From e0e5e87ed37a66bfc06e715eb8ddda7293fd1849 Mon Sep 17 00:00:00 2001 From: Quentin de Quelen Date: Mon, 14 Jan 2019 15:34:56 +0100 Subject: [PATCH] feat: HTTP server example can use stopwords --- examples/http-server.rs | 49 ++++++++++++++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 11 deletions(-) diff --git a/examples/http-server.rs b/examples/http-server.rs index bf3aef508..61859a5dc 100644 --- a/examples/http-server.rs +++ b/examples/http-server.rs @@ -3,12 +3,14 @@ static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; use log::{error, info}; use std::error::Error; +use std::ffi::OsStr; use std::fmt; +use std::fs::{self, File}; +use std::io::{self, BufRead, BufReader}; +use std::net::SocketAddr; use std::path::{PathBuf, Path}; use std::sync::Arc; use std::time::SystemTime; -use std::fs; -use std::ffi::OsStr; use hashbrown::{HashMap, HashSet}; use chashmap::CHashMap; @@ -30,9 +32,13 @@ pub struct Opt { #[structopt(parse(from_os_str))] pub database_path: PathBuf, + /// The address and port to bind the server to. + #[structopt(short = "l", default_value = "127.0.0.1:8080")] + pub listen_addr: SocketAddr, + /// The path to the list of stop words (one by line). #[structopt(long = "stop-words", parse(from_os_str))] - pub stop_words_path: Option, + pub stop_words: PathBuf, } // @@ -73,14 +79,16 @@ impl From> for DatabaseError { pub struct MultiDatabase { databases: CHashMap, db_path: PathBuf, + stop_words: HashSet, } impl MultiDatabase { - pub fn new(path: PathBuf) -> MultiDatabase { + pub fn new(path: PathBuf, stop_words: HashSet) -> MultiDatabase { MultiDatabase { databases: CHashMap::new(), - db_path: path + db_path: path, + stop_words: stop_words } } @@ -178,6 +186,20 @@ fn get_file_name_from_path(path: &str) -> Option<&str> { Path::new(path).file_stem().and_then(OsStr::to_str) } +fn retrieve_stop_words(path: &Path) -> io::Result> { + let f = File::open(path)?; + let reader = BufReader::new(f); + let mut words = HashSet::new(); + + for line in reader.lines() { + let line = line?; + let word = line.trim().to_string(); + words.insert(word); + } + + Ok(words) +} + // // PARAMS & BODY FOR HTTPS HANDLERS // @@ -271,7 +293,7 @@ fn ingest(index_name: String, body: IngestBody, db: Arc) -> Resul } } - let stop_words = HashSet::new(); + let stop_words = &db.stop_words; if let Some(documents) = body.insert { for doc in documents { if let Err(e) = update.update_document(doc, &tokenizer_builder, &stop_words) { @@ -355,7 +377,7 @@ fn search(index_name: String, query: SearchQuery, db: Arc) -> Res Ok(response) } -fn start_server(db: Arc) { +fn start_server(listen_addr: SocketAddr, db: Arc) { let index_path = warp::path("index").and(warp::path::param::()); let db = warp::any().map(move || db.clone()); @@ -390,19 +412,24 @@ fn start_server(db: Arc) { let routes = api.with(logs).with(headers); - warp::serve(routes) - .run(([127, 0, 0, 1], 8080)); + info!("Server is started on {}", listen_addr); + warp::serve(routes).run(listen_addr); } fn main() { env_logger::init(); let opt = Opt::from_args(); - let db = Arc::new(MultiDatabase::new(opt.database_path.clone())); + let stop_words = match retrieve_stop_words(&opt.stop_words) { + Ok(s) => s, + Err(_) => HashSet::new(), + }; + + let db = Arc::new(MultiDatabase::new(opt.database_path.clone(), stop_words)); db.load_existing(); - start_server(db); + start_server(opt.listen_addr, db); }