feat: Make a more debug oriented search tool

This commit is contained in:
Kerollmops 2018-06-24 19:27:24 +02:00 committed by Clément Renault
parent b406fb4aed
commit 79d8555620
3 changed files with 69 additions and 14 deletions

View File

@ -17,6 +17,11 @@ name = "cfg-if"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "elapsed"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "env_logger"
version = "0.3.5"
@ -109,6 +114,7 @@ dependencies = [
name = "raptor-search"
version = "0.1.0"
dependencies = [
"elapsed 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
"env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
"fst 0.3.0 (git+https://github.com/Kerollmops/fst.git?branch=op-builder-with-state)",
"raptor 0.1.0",
@ -169,6 +175,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum bincode 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bda13183df33055cbb84b847becce220d392df502ebe7a4a78d7021771ed94d0"
"checksum byteorder 1.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "74c0b906e9446b0a2e4f760cdb3fa4b2c48cdc6db8766a845c54b6ff063fd2e9"
"checksum cfg-if 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "405216fd8fe65f718daa7102ea808a946b6ce40c742998fbfd3463645552de18"
"checksum elapsed 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6f4e5af126dafd0741c2ad62d47f68b28602550102e5f0dd45c8a97fc8b49c29"
"checksum env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "15abd780e45b3ea4f76b4e9a26ff4843258dd8a3eed2775a0e7368c2e7936c2f"
"checksum fst 0.3.0 (git+https://github.com/Kerollmops/fst.git?branch=op-builder-with-state)" = "<none>"
"checksum group-by 0.1.0 (git+https://github.com/Kerollmops/group-by.git)" = "<none>"

View File

@ -8,6 +8,7 @@ env_logger = { version = "0.3", default-features = false }
raptor = { path = ".." }
serde = "1.0"
serde_derive = "1.0"
elapsed = "0.1"
[dependencies.fst]
git = "https://github.com/Kerollmops/fst.git"

View File

@ -1,22 +1,16 @@
extern crate env_logger;
extern crate fst;
extern crate raptor;
extern crate elapsed;
use std::env;
use std::{env, fs};
use std::process::Command;
use std::io::{self, Write};
use elapsed::measure_time;
use fst::Streamer;
use raptor::{load_map, RankedStream, LevBuilder};
fn main() {
drop(env_logger::init());
let lev_builder = LevBuilder::new();
let map = load_map("map.fst", "values.vecs").unwrap();
let query = env::args().nth(1).expect("Please enter query words!");
let query = query.to_lowercase();
println!("Searching for: {:?}", query);
use raptor::{load_map, DocIndexMap, RankedStream, LevBuilder};
fn search(map: &DocIndexMap, lev_builder: &LevBuilder, query: &str) {
let mut automatons = Vec::new();
for query in query.split_whitespace() {
let lev = lev_builder.build_automaton(query);
@ -26,6 +20,59 @@ fn main() {
let limit: Option<usize> = env::var("RAPTOR_OUTPUT_LIMIT").ok().and_then(|x| x.parse().ok());
let mut stream = RankedStream::new(&map, map.values(), automatons, limit.unwrap_or(20));
while let Some(document_id) = stream.next() {
println!("{:?}", document_id);
print!("{:?}", document_id);
/* only here to debug !
if let Ok(_) = fs::File::open("products.json_lines") {
let output = Command::new("rg")
.arg(document_id.to_string())
.arg("products.json_lines")
.output();
if let Ok(Ok(output)) = output.map(|o| String::from_utf8(o.stdout)) {
if let Some(line) = output.lines().next() {
let pattern = "\"title\":";
if let Some(index) = line.find(pattern) {
let line: String = line[index..].chars().skip(pattern.len()).take(100).collect();
print!(" => {}", line);
}
}
}
}
// */
println!();
}
}
fn main() {
drop(env_logger::init());
let (elapsed, (lev_builder, map)) = measure_time(|| {
let lev_builder = LevBuilder::new();
let map = load_map("map.fst", "values.vecs").unwrap();
(lev_builder, map)
});
println!("Loaded in {}", elapsed);
match env::args().nth(1) {
Some(query) => {
println!("Searching for: {:?}", query);
let query = query.to_lowercase();
let (elapsed, _) = measure_time(|| search(&map, &lev_builder, &query));
println!("Finished in {}", elapsed);
},
None => loop {
print!("Searching for: ");
io::stdout().flush().unwrap();
let mut query = String::new();
io::stdin().read_line(&mut query).unwrap();
let query = query.trim().to_lowercase();
if query.is_empty() { break }
let (elapsed, _) = measure_time(|| search(&map, &lev_builder, &query));
println!("Finished in {}", elapsed);
},
}
}