use std::error::Error; use std::io::stdin; use std::path::Path; use std::time::Instant; use heed::EnvOpenOptions; use milli::{ execute_search, filtered_universe, DefaultSearchLogger, GeoSortStrategy, Index, SearchContext, SearchLogger, TermsMatchingStrategy, TimeBudget, }; #[global_allocator] static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc; fn main() -> Result<(), Box> { let mut args = std::env::args(); let program_name = args.next().expect("No program name"); let dataset = args.next().unwrap_or_else(|| { panic!( "Missing path to index. Usage: {} [] [print-documents]", program_name ) }); let detailed_logger_dir = args.next(); let print_documents: bool = if let Some(arg) = args.next() { arg == "print-documents" } else { false }; let mut options = EnvOpenOptions::new(); options.map_size(100 * 1024 * 1024 * 1024); // 100 GB let index = Index::new(options, dataset)?; let txn = index.read_txn()?; let dictionary = index.document_decompression_dictionary(&txn).unwrap(); let mut query = String::new(); while stdin().read_line(&mut query)? > 0 { for _ in 0..2 { let mut default_logger = DefaultSearchLogger; // FIXME: consider resetting the state of the logger between search executions as otherwise panics are possible. // Workaround'd here by recreating the logger on each iteration of the loop let mut detailed_logger = detailed_logger_dir .as_ref() .map(|logger_dir| (milli::VisualSearchLogger::default(), logger_dir)); let logger: &mut dyn SearchLogger<_> = if let Some((detailed_logger, _)) = detailed_logger.as_mut() { detailed_logger } else { &mut default_logger }; let start = Instant::now(); let mut ctx = SearchContext::new(&index, &txn)?; let mut buffer = Vec::new(); let universe = filtered_universe(ctx.index, ctx.txn, &None)?; let docs = execute_search( &mut ctx, (!query.trim().is_empty()).then(|| query.trim()), TermsMatchingStrategy::Last, milli::score_details::ScoringStrategy::Skip, false, universe, &None, &None, GeoSortStrategy::default(), 0, 20, None, &mut DefaultSearchLogger, logger, TimeBudget::max(), None, )?; if let Some((logger, dir)) = detailed_logger { logger.finish(&mut ctx, Path::new(dir))?; } let elapsed = start.elapsed(); println!("new: {}us, docids: {:?}", elapsed.as_micros(), docs.documents_ids); if print_documents { let compressed_documents = index .compressed_documents(&txn, docs.documents_ids.iter().copied()) .unwrap() .into_iter() .map(|(id, compressed_obkv)| { let obkv = compressed_obkv .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref()) .unwrap(); let mut object = serde_json::Map::default(); for (fid, fid_name) in index.fields_ids_map(&txn).unwrap().iter() { let value = obkv.get(fid).unwrap(); let value: serde_json::Value = serde_json::from_slice(value).unwrap(); object.insert(fid_name.to_owned(), value); } (id, serde_json::to_string_pretty(&object).unwrap()) }) .collect::>(); for (id, document) in compressed_documents { println!("{id}:"); println!("{document}"); } let compressed_documents = index .compressed_documents(&txn, docs.documents_ids.iter().copied()) .unwrap() .into_iter() .map(|(id, compressed_obkv)| { let mut object = serde_json::Map::default(); let obkv = compressed_obkv .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref()) .unwrap(); for (fid, fid_name) in index.fields_ids_map(&txn).unwrap().iter() { let value = obkv.get(fid).unwrap(); let value: serde_json::Value = serde_json::from_slice(value).unwrap(); object.insert(fid_name.to_owned(), value); } (id, serde_json::to_string_pretty(&object).unwrap()) }) .collect::>(); println!("{}us: {:?}", elapsed.as_micros(), docs.documents_ids); for (id, document) in compressed_documents { println!("{id}:"); println!("{document}"); } } } query.clear(); } Ok(()) }