MeiliSearch/examples/query-database.rs

171 lines
5.3 KiB
Rust
Raw Normal View History

#[global_allocator]
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
use std::collections::btree_map::{BTreeMap, Entry};
use std::iter::FromIterator;
2018-12-10 15:13:25 +01:00
use std::io::{self, Write};
use std::path::PathBuf;
use std::error::Error;
use hashbrown::{HashMap, HashSet};
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
2018-12-10 15:13:25 +01:00
use structopt::StructOpt;
use meilidb::database::schema::SchemaAttr;
2018-12-10 15:13:25 +01:00
use meilidb::database::Database;
use meilidb::Match;
2018-12-10 15:13:25 +01:00
#[derive(Debug, StructOpt)]
pub struct Opt {
/// The destination where the database must be created
#[structopt(parse(from_os_str))]
pub database_path: PathBuf,
2018-12-10 15:30:28 +01:00
/// Fields that must be displayed.
pub displayed_fields: Vec<String>,
2018-12-10 15:30:28 +01:00
/// The number of returned results
#[structopt(short = "n", long = "number-results", default_value = "10")]
pub number_results: usize,
2018-12-10 15:13:25 +01:00
}
type Document = HashMap<String, String>;
2018-12-10 15:13:25 +01:00
fn display_highlights(text: &str, ranges: &[usize]) -> io::Result<()> {
let mut stdout = StandardStream::stdout(ColorChoice::Always);
let mut highlighted = false;
for range in ranges.windows(2) {
let [start, end] = match range { [start, end] => [*start, *end], _ => unreachable!() };
if highlighted {
stdout.set_color(ColorSpec::new().set_fg(Some(Color::Yellow)))?;
}
write!(&mut stdout, "{}", &text[start..end])?;
stdout.reset()?;
highlighted = !highlighted;
}
Ok(())
}
fn char_to_byte_range(index: usize, length: usize, text: &str) -> (usize, usize) {
let mut byte_index = 0;
let mut byte_length = 0;
for (n, (i, c)) in text.char_indices().enumerate() {
if n == index {
byte_index = i;
}
if n + 1 == index + length {
byte_length = i - byte_index + c.len_utf8();
break;
}
}
(byte_index, byte_length)
}
fn create_highlight_areas(text: &str, matches: &[Match], attribute: SchemaAttr) -> Vec<usize> {
let mut byte_indexes = BTreeMap::new();
for match_ in matches {
let match_attribute = match_.attribute.attribute();
if SchemaAttr::new(match_attribute) == attribute {
let word_area = match_.word_area;
let char_index = word_area.char_index() as usize;
let char_length = word_area.length() as usize;
let (byte_index, byte_length) = char_to_byte_range(char_index, char_length, text);
match byte_indexes.entry(byte_index) {
Entry::Vacant(entry) => { entry.insert(byte_length); },
Entry::Occupied(mut entry) => {
if *entry.get() < byte_length {
entry.insert(byte_length);
}
},
}
}
}
let mut title_areas = Vec::new();
title_areas.push(0);
for (byte_index, length) in byte_indexes {
title_areas.push(byte_index);
title_areas.push(byte_index + length);
}
title_areas.push(text.len());
title_areas.sort_unstable();
title_areas
}
2018-12-10 15:13:25 +01:00
fn main() -> Result<(), Box<Error>> {
2019-01-06 15:01:09 +01:00
let _ = env_logger::init();
2018-12-10 15:13:25 +01:00
let opt = Opt::from_args();
let (elapsed, result) = elapsed::measure_time(|| Database::open(&opt.database_path));
let database = result?;
2018-12-10 15:30:28 +01:00
println!("database prepared for you in {}", elapsed);
2018-12-10 15:13:25 +01:00
let mut buffer = String::new();
let input = io::stdin();
loop {
2018-12-10 15:30:28 +01:00
print!("Searching for: ");
2018-12-10 15:13:25 +01:00
io::stdout().flush()?;
if input.read_line(&mut buffer)? == 0 { break }
let query = buffer.trim_end_matches('\n');
2018-12-10 15:13:25 +01:00
let view = database.view();
let schema = view.schema();
2018-12-10 15:13:25 +01:00
let (elapsed, documents) = elapsed::measure_time(|| {
let builder = view.query_builder().unwrap();
builder.query(query, 0..opt.number_results)
2018-12-10 15:13:25 +01:00
});
let number_of_documents = documents.len();
for doc in documents {
match view.document_by_id::<Document>(doc.id) {
Ok(document) => {
for name in &opt.displayed_fields {
let attr = match schema.attribute(name) {
Some(attr) => attr,
None => continue,
};
let text = match document.get(name) {
Some(text) => text,
None => continue,
};
print!("{}: ", name);
let areas = create_highlight_areas(&text, &doc.matches, attr);
display_highlights(&text, &areas)?;
println!();
}
},
2018-12-10 15:13:25 +01:00
Err(e) => eprintln!("{}", e),
}
let mut matching_attributes = HashSet::new();
for _match in doc.matches {
let attr = SchemaAttr::new(_match.attribute.attribute());
let name = schema.attribute_name(attr);
matching_attributes.insert(name);
}
let matching_attributes = Vec::from_iter(matching_attributes);
println!("matching in: {:?}", matching_attributes);
println!();
2018-12-10 15:13:25 +01:00
}
2019-01-06 15:01:09 +01:00
eprintln!("===== Found {} results in {} =====", number_of_documents, elapsed);
2018-12-10 15:13:25 +01:00
buffer.clear();
}
Ok(())
}