use std::collections::{BTreeSet, HashSet}; use std::fs::create_dir_all; use std::marker::PhantomData; use std::ops::Deref; use std::path::Path; use std::sync::Arc; use heed::{EnvOpenOptions, RoTxn}; use milli::update::Setting; use milli::{obkv_to_json, FieldId}; use serde_json::{Map, Value}; use error::Result; pub use search::{default_crop_length, SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT}; pub use updates::{Checked, Facets, Settings, Unchecked}; use crate::EnvSizer; use crate::index_controller::update_file_store::UpdateFileStore; use self::error::IndexError; pub mod error; pub mod update_handler; mod dump; mod search; mod updates; pub type Document = Map; #[derive(Clone)] pub struct Index { pub inner: Arc, update_file_store: Arc, } impl Deref for Index { type Target = milli::Index; fn deref(&self) -> &Self::Target { self.inner.as_ref() } } impl Index { pub fn open(path: impl AsRef, size: usize, update_file_store: Arc) -> Result { create_dir_all(&path)?; let mut options = EnvOpenOptions::new(); options.map_size(size); let inner = Arc::new(milli::Index::new(options, &path)?); Ok(Index { inner, update_file_store }) } pub fn settings(&self) -> Result> { let txn = self.read_txn()?; self.settings_txn(&txn) } pub fn settings_txn(&self, txn: &RoTxn) -> Result> { let displayed_attributes = self .displayed_fields(txn)? .map(|fields| fields.into_iter().map(String::from).collect()); let searchable_attributes = self .searchable_fields(txn)? .map(|fields| fields.into_iter().map(String::from).collect()); let filterable_attributes = self.filterable_fields(txn)?.into_iter().collect(); let sortable_attributes = self.sortable_fields(txn)?.into_iter().collect(); let criteria = self .criteria(txn)? .into_iter() .map(|c| c.to_string()) .collect(); let stop_words = self .stop_words(txn)? .map(|stop_words| -> Result> { Ok(stop_words.stream().into_strs()?.into_iter().collect()) }) .transpose()? .unwrap_or_else(BTreeSet::new); let distinct_field = self.distinct_field(txn)?.map(String::from); // in milli each word in the synonyms map were split on their separator. Since we lost // this information we are going to put space between words. let synonyms = self .synonyms(txn)? .iter() .map(|(key, values)| { ( key.join(" "), values.iter().map(|value| value.join(" ")).collect(), ) }) .collect(); Ok(Settings { displayed_attributes: match displayed_attributes { Some(attrs) => Setting::Set(attrs), None => Setting::Reset, }, searchable_attributes: match searchable_attributes { Some(attrs) => Setting::Set(attrs), None => Setting::Reset, }, filterable_attributes: Setting::Set(filterable_attributes), sortable_attributes: Setting::Set(sortable_attributes), ranking_rules: Setting::Set(criteria), stop_words: Setting::Set(stop_words), distinct_attribute: match distinct_field { Some(field) => Setting::Set(field), None => Setting::Reset, }, synonyms: Setting::Set(synonyms), _kind: PhantomData, }) } pub fn retrieve_documents>( &self, offset: usize, limit: usize, attributes_to_retrieve: Option>, ) -> Result>> { let txn = self.read_txn()?; let fields_ids_map = self.fields_ids_map(&txn)?; let fields_to_display = self.fields_to_display(&txn, &attributes_to_retrieve, &fields_ids_map)?; let iter = self.documents.range(&txn, &(..))?.skip(offset).take(limit); let mut documents = Vec::new(); for entry in iter { let (_id, obkv) = entry?; let object = obkv_to_json(&fields_to_display, &fields_ids_map, obkv)?; documents.push(object); } Ok(documents) } pub fn retrieve_document>( &self, doc_id: String, attributes_to_retrieve: Option>, ) -> Result> { let txn = self.read_txn()?; let fields_ids_map = self.fields_ids_map(&txn)?; let fields_to_display = self.fields_to_display(&txn, &attributes_to_retrieve, &fields_ids_map)?; let internal_id = self .external_documents_ids(&txn)? .get(doc_id.as_bytes()) .ok_or_else(|| IndexError::DocumentNotFound(doc_id.clone()))?; let document = self .documents(&txn, std::iter::once(internal_id))? .into_iter() .next() .map(|(_, d)| d) .ok_or(IndexError::DocumentNotFound(doc_id))?; let document = obkv_to_json(&fields_to_display, &fields_ids_map, document)?; Ok(document) } pub fn size(&self) -> u64 { self.env.size() } fn fields_to_display>( &self, txn: &heed::RoTxn, attributes_to_retrieve: &Option>, fields_ids_map: &milli::FieldsIdsMap, ) -> Result> { let mut displayed_fields_ids = match self.displayed_fields_ids(txn)? { Some(ids) => ids.into_iter().collect::>(), None => fields_ids_map.iter().map(|(id, _)| id).collect(), }; let attributes_to_retrieve_ids = match attributes_to_retrieve { Some(attrs) => attrs .iter() .filter_map(|f| fields_ids_map.id(f.as_ref())) .collect::>(), None => fields_ids_map.iter().map(|(id, _)| id).collect(), }; displayed_fields_ids.retain(|fid| attributes_to_retrieve_ids.contains(fid)); Ok(displayed_fields_ids) } }