MeiliSearch/milli/examples/search.rs

114 lines
4.5 KiB
Rust
Raw Normal View History

2023-03-22 14:50:41 +01:00
use std::error::Error;
use std::io::stdin;
use std::time::Instant;
use heed::EnvOpenOptions;
use milli::{
2023-03-27 17:49:43 +02:00
execute_search, DefaultSearchLogger, Index, SearchContext, SearchLogger, TermsMatchingStrategy,
2023-03-22 14:50:41 +01:00
};
#[global_allocator]
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
fn main() -> Result<(), Box<dyn Error>> {
let mut args = std::env::args();
2023-03-27 17:49:43 +02:00
let program_name = args.next().expect("No program name");
let dataset = args.next().unwrap_or_else(|| {
panic!(
2023-03-27 17:49:43 +02:00
"Missing path to index. Usage: {} <PATH-TO-INDEX> [<logger-dir>] [print-documents]",
program_name
)
});
let detailed_logger = args.next();
let print_documents: bool =
if let Some(arg) = args.next() { arg == "print-documents" } else { false };
2023-03-22 14:50:41 +01:00
let mut options = EnvOpenOptions::new();
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
let index = Index::new(options, dataset)?;
let txn = index.read_txn()?;
let mut query = String::new();
while stdin().read_line(&mut query)? > 0 {
2023-03-23 09:12:35 +01:00
for _ in 0..2 {
2023-03-27 17:49:43 +02:00
let mut default_logger = DefaultSearchLogger;
// FIXME: consider resetting the state of the logger between search executions as otherwise panics are possible.
// Workaround'd here by recreating the logger on each iteration of the loop
let mut detailed_logger = detailed_logger
.as_ref()
.map(|logger_dir| milli::DetailedSearchLogger::new(logger_dir));
let logger: &mut dyn SearchLogger<_> =
if let Some(detailed_logger) = detailed_logger.as_mut() {
detailed_logger
} else {
&mut default_logger
};
2023-03-23 09:12:35 +01:00
2023-03-22 14:50:41 +01:00
let start = Instant::now();
2023-03-27 17:49:43 +02:00
2023-03-22 14:50:41 +01:00
let mut ctx = SearchContext::new(&index, &txn);
let docs = execute_search(
&mut ctx,
2023-03-27 17:49:43 +02:00
&(!query.trim().is_empty()).then(|| query.trim().to_owned()),
2023-03-22 14:50:41 +01:00
// what a the from which when there is
TermsMatchingStrategy::Last,
2023-03-27 17:49:43 +02:00
&None,
2023-03-22 14:50:41 +01:00
0,
20,
2023-03-27 17:49:43 +02:00
None,
2023-03-22 14:50:41 +01:00
&mut DefaultSearchLogger,
2023-03-27 17:49:43 +02:00
logger,
2023-03-22 14:50:41 +01:00
)?;
2023-03-27 17:49:43 +02:00
if let Some(logger) = &detailed_logger {
logger.write_d2_description(&mut ctx);
}
2023-03-22 14:50:41 +01:00
let elapsed = start.elapsed();
println!("new: {}us, docids: {:?}", elapsed.as_micros(), docs.documents_ids);
2023-03-27 17:49:43 +02:00
if print_documents {
let documents = index
.documents(&txn, docs.documents_ids.iter().copied())
.unwrap()
.into_iter()
.map(|(id, obkv)| {
let mut object = serde_json::Map::default();
for (fid, fid_name) in index.fields_ids_map(&txn).unwrap().iter() {
let value = obkv.get(fid).unwrap();
let value: serde_json::Value = serde_json::from_slice(value).unwrap();
object.insert(fid_name.to_owned(), value);
}
(id, serde_json::to_string_pretty(&object).unwrap())
})
.collect::<Vec<_>>();
2023-03-22 14:50:41 +01:00
2023-03-27 17:49:43 +02:00
for (id, document) in documents {
println!("{id}:");
println!("{document}");
}
2023-03-22 14:50:41 +01:00
2023-03-27 17:49:43 +02:00
let documents = index
.documents(&txn, docs.documents_ids.iter().copied())
.unwrap()
.into_iter()
.map(|(id, obkv)| {
let mut object = serde_json::Map::default();
for (fid, fid_name) in index.fields_ids_map(&txn).unwrap().iter() {
let value = obkv.get(fid).unwrap();
let value: serde_json::Value = serde_json::from_slice(value).unwrap();
object.insert(fid_name.to_owned(), value);
}
(id, serde_json::to_string_pretty(&object).unwrap())
})
.collect::<Vec<_>>();
println!("{}us: {:?}", elapsed.as_micros(), docs.documents_ids);
for (id, document) in documents {
println!("{id}:");
println!("{document}");
}
}
2023-03-22 14:50:41 +01:00
}
query.clear();
}
Ok(())
}