Merge pull request #67 from meilisearch/fix-settings

Fix displayed and searchable attributes
This commit is contained in:
Clément Renault 2021-01-26 14:03:43 +01:00 committed by GitHub
commit 30dae0205e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 1028 additions and 878 deletions

481
Cargo.lock generated

File diff suppressed because it is too large Load Diff

632
http-ui/Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,3 @@
use std::borrow::Cow;
use std::collections::{HashMap, HashSet}; use std::collections::{HashMap, HashSet};
use std::fmt::Display; use std::fmt::Display;
use std::fs::{File, create_dir_all}; use std::fs::{File, create_dir_all};
@ -654,13 +653,13 @@ async fn main() -> anyhow::Result<()> {
let mut documents = Vec::new(); let mut documents = Vec::new();
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let displayed_fields = match index.displayed_fields(&rtxn).unwrap() { let displayed_fields = match index.displayed_fields_ids(&rtxn).unwrap() {
Some(fields) => Cow::Borrowed(fields), Some(fields) => fields,
None => Cow::Owned(fields_ids_map.iter().map(|(id, _)| id).collect()), None => fields_ids_map.iter().map(|(id, _)| id).collect(),
}; };
let attributes_to_highlight = match index.searchable_fields(&rtxn).unwrap() { let attributes_to_highlight = match index.searchable_fields(&rtxn).unwrap() {
Some(fields) => fields.iter().flat_map(|id| fields_ids_map.name(*id)).map(ToOwned::to_owned).collect(), Some(fields) => fields.into_iter().map(String::from).collect(),
None => fields_ids_map.iter().map(|(_, name)| name).map(ToOwned::to_owned).collect(), None => fields_ids_map.iter().map(|(_, name)| name).map(String::from).collect(),
}; };
let stop_words = fst::Set::default(); let stop_words = fst::Set::default();
@ -690,9 +689,9 @@ async fn main() -> anyhow::Result<()> {
let external_documents_ids = index.external_documents_ids(&rtxn).unwrap(); let external_documents_ids = index.external_documents_ids(&rtxn).unwrap();
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let displayed_fields = match index.displayed_fields(&rtxn).unwrap() { let displayed_fields = match index.displayed_fields_ids(&rtxn).unwrap() {
Some(fields) => Cow::Borrowed(fields), Some(fields) => fields,
None => Cow::Owned(fields_ids_map.iter().map(|(id, _)| id).collect()), None => fields_ids_map.iter().map(|(id, _)| id).collect(),
}; };
match external_documents_ids.get(&id) { match external_documents_ids.get(&id) {

View File

@ -1,10 +1,12 @@
use crate::{FieldsIdsMap, FieldId}; use std::collections::HashMap;
use anyhow::{Context, bail}; use anyhow::{Context, bail};
use regex::Regex; use regex::Regex;
use serde::{Serialize, Deserialize}; use serde::{Serialize, Deserialize};
#[derive(Debug, Serialize, Deserialize, Copy, Clone, PartialEq, Eq)] use crate::facet::FacetType;
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub enum Criterion { pub enum Criterion {
/// Sorted by increasing number of typos. /// Sorted by increasing number of typos.
Typo, Typo,
@ -21,13 +23,13 @@ pub enum Criterion {
/// Sorted by the similarity of the matched words with the query words. /// Sorted by the similarity of the matched words with the query words.
Exactness, Exactness,
/// Sorted by the increasing value of the field specified. /// Sorted by the increasing value of the field specified.
Asc(FieldId), Asc(String),
/// Sorted by the decreasing value of the field specified. /// Sorted by the decreasing value of the field specified.
Desc(FieldId), Desc(String),
} }
impl Criterion { impl Criterion {
pub fn from_str(fields_ids_map: &mut FieldsIdsMap, txt: &str) -> anyhow::Result<Criterion> { pub fn from_str(faceted_attributes: &HashMap<String, FacetType>, txt: &str) -> anyhow::Result<Criterion> {
match txt { match txt {
"typo" => Ok(Criterion::Typo), "typo" => Ok(Criterion::Typo),
"words" => Ok(Criterion::Words), "words" => Ok(Criterion::Words),
@ -40,22 +42,15 @@ impl Criterion {
let caps = re.captures(text).with_context(|| format!("unknown criterion name: {}", text))?; let caps = re.captures(text).with_context(|| format!("unknown criterion name: {}", text))?;
let order = caps.get(1).unwrap().as_str(); let order = caps.get(1).unwrap().as_str();
let field_name = caps.get(2).unwrap().as_str(); let field_name = caps.get(2).unwrap().as_str();
let field_id = fields_ids_map.insert(field_name).context("field id limit reached")?; faceted_attributes.get(field_name).with_context(|| format!("Can't use {:?} as a criterion as it isn't a faceted field.", field_name))?;
match order { match order {
"asc" => Ok(Criterion::Asc(field_id)), "asc" => Ok(Criterion::Asc(field_name.to_string())),
"desc" => Ok(Criterion::Desc(field_id)), "desc" => Ok(Criterion::Desc(field_name.to_string())),
otherwise => bail!("unknown criterion name: {}", otherwise), otherwise => bail!("unknown criterion name: {}", otherwise),
} }
}, },
} }
} }
pub fn field_id(&self) -> Option<FieldId> {
match *self {
Criterion::Asc(fid) | Criterion::Desc(fid) => Some(fid),
_ => None,
}
}
} }
pub fn default_criteria() -> Vec<Criterion> { pub fn default_criteria() -> Vec<Criterion> {

View File

@ -112,8 +112,8 @@ impl Index {
/* primary key */ /* primary key */
/// Writes the documents primary key, this is the field name that is used to store the id. /// Writes the documents primary key, this is the field name that is used to store the id.
pub fn put_primary_key(&self, wtxn: &mut RwTxn, primary_key: FieldId) -> heed::Result<()> { pub fn put_primary_key(&self, wtxn: &mut RwTxn, primary_key: &str) -> heed::Result<()> {
self.main.put::<_, Str, OwnedType<FieldId>>(wtxn, PRIMARY_KEY_KEY, &primary_key) self.main.put::<_, Str, Str>(wtxn, PRIMARY_KEY_KEY, &primary_key)
} }
/// Deletes the primary key of the documents, this can be done to reset indexes settings. /// Deletes the primary key of the documents, this can be done to reset indexes settings.
@ -122,8 +122,8 @@ impl Index {
} }
/// Returns the documents primary key, `None` if it hasn't been defined. /// Returns the documents primary key, `None` if it hasn't been defined.
pub fn primary_key(&self, rtxn: &RoTxn) -> heed::Result<Option<FieldId>> { pub fn primary_key<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<&'t str>> {
self.main.get::<_, Str, OwnedType<FieldId>>(rtxn, PRIMARY_KEY_KEY) self.main.get::<_, Str, Str>(rtxn, PRIMARY_KEY_KEY)
} }
/* external documents ids */ /* external documents ids */
@ -175,10 +175,10 @@ impl Index {
/* displayed fields */ /* displayed fields */
/// Writes the fields ids that must be displayed in the defined order. /// Writes the fields that must be displayed in the defined order.
/// There must be not be any duplicate field id. /// There must be not be any duplicate field id.
pub fn put_displayed_fields(&self, wtxn: &mut RwTxn, fields: &[FieldId]) -> heed::Result<()> { pub fn put_displayed_fields(&self, wtxn: &mut RwTxn, fields: &[&str]) -> heed::Result<()> {
self.main.put::<_, Str, ByteSlice>(wtxn, DISPLAYED_FIELDS_KEY, fields) self.main.put::<_, Str, SerdeBincode<&[&str]>>(wtxn, DISPLAYED_FIELDS_KEY, &fields)
} }
/// Deletes the displayed fields ids, this will make the engine to display /// Deletes the displayed fields ids, this will make the engine to display
@ -187,18 +187,27 @@ impl Index {
self.main.delete::<_, Str>(wtxn, DISPLAYED_FIELDS_KEY) self.main.delete::<_, Str>(wtxn, DISPLAYED_FIELDS_KEY)
} }
/// Returns the displayed fields ids in the order they must be returned. If it returns /// Returns the displayed fields in the order they were set by the user. If it returns
/// `None` it means that all the attributes are displayed in the order of the `FieldsIdsMap`. /// `None` it means that all the attributes are set as displayed in the order of the `FieldsIdsMap`.
pub fn displayed_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<&'t [FieldId]>> { pub fn displayed_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<Vec<&'t str>>> {
self.main.get::<_, Str, ByteSlice>(rtxn, DISPLAYED_FIELDS_KEY) self.main.get::<_, Str, SerdeBincode<Vec<&'t str>>>(rtxn, DISPLAYED_FIELDS_KEY)
}
pub fn displayed_fields_ids(&self, rtxn: &RoTxn) -> heed::Result<Option<Vec<FieldId>>> {
let fields_ids_map = self.fields_ids_map(rtxn)?;
let ids = self.displayed_fields(rtxn)?
.map(|fields| fields
.into_iter()
.map(|name| fields_ids_map.id(name).expect("Field not found"))
.collect::<Vec<_>>());
Ok(ids)
} }
/* searchable fields */ /* searchable fields */
/// Writes the searchable fields, when this list is specified, only these are indexed. /// Writes the searchable fields, when this list is specified, only these are indexed.
pub fn put_searchable_fields(&self, wtxn: &mut RwTxn, fields: &[FieldId]) -> heed::Result<()> { pub fn put_searchable_fields(&self, wtxn: &mut RwTxn, fields: &[&str]) -> heed::Result<()> {
assert!(fields.windows(2).all(|win| win[0] < win[1])); // is sorted self.main.put::<_, Str, SerdeBincode<&[&str]>>(wtxn, SEARCHABLE_FIELDS_KEY, &fields)
self.main.put::<_, Str, ByteSlice>(wtxn, SEARCHABLE_FIELDS_KEY, fields)
} }
/// Deletes the searchable fields, when no fields are specified, all fields are indexed. /// Deletes the searchable fields, when no fields are specified, all fields are indexed.
@ -206,17 +215,36 @@ impl Index {
self.main.delete::<_, Str>(wtxn, SEARCHABLE_FIELDS_KEY) self.main.delete::<_, Str>(wtxn, SEARCHABLE_FIELDS_KEY)
} }
/// Returns the searchable fields ids, those are the fields that are indexed, /// Returns the searchable fields, those are the fields that are indexed,
/// if the searchable fields aren't there it means that **all** the fields are indexed. /// if the searchable fields aren't there it means that **all** the fields are indexed.
pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<&'t [FieldId]>> { pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<Vec<&'t str>>> {
self.main.get::<_, Str, ByteSlice>(rtxn, SEARCHABLE_FIELDS_KEY) self.main.get::<_, Str, SerdeBincode<Vec<&'t str>>>(rtxn, SEARCHABLE_FIELDS_KEY)
}
/// Identical to `searchable_fields`, but returns the ids instead.
pub fn searchable_fields_ids(&self, rtxn: &RoTxn) -> heed::Result<Option<Vec<FieldId>>> {
match self.searchable_fields(rtxn)? {
Some(names) => {
let fields_map = self.fields_ids_map(rtxn)?;
let mut ids = Vec::new();
for name in names {
let id = fields_map
.id(name)
.ok_or_else(|| format!("field id map must contain {:?}", name))
.expect("corrupted data: ");
ids.push(id);
}
Ok(Some(ids))
}
None => Ok(None),
}
} }
/* faceted fields */ /* faceted fields */
/// Writes the facet fields ids associated with their facet type or `None` if /// Writes the facet fields associated with their facet type or `None` if
/// the facet type is currently unknown. /// the facet type is currently unknown.
pub fn put_faceted_fields(&self, wtxn: &mut RwTxn, fields_types: &HashMap<FieldId, FacetType>) -> heed::Result<()> { pub fn put_faceted_fields(&self, wtxn: &mut RwTxn, fields_types: &HashMap<String, FacetType>) -> heed::Result<()> {
self.main.put::<_, Str, SerdeJson<_>>(wtxn, FACETED_FIELDS_KEY, fields_types) self.main.put::<_, Str, SerdeJson<_>>(wtxn, FACETED_FIELDS_KEY, fields_types)
} }
@ -225,9 +253,26 @@ impl Index {
self.main.delete::<_, Str>(wtxn, FACETED_FIELDS_KEY) self.main.delete::<_, Str>(wtxn, FACETED_FIELDS_KEY)
} }
/// Returns the facet fields ids associated with their facet type. /// Returns the facet fields names associated with their facet type.
pub fn faceted_fields(&self, wtxn: &RoTxn) -> heed::Result<HashMap<FieldId, FacetType>> { pub fn faceted_fields(&self, rtxn: &RoTxn) -> heed::Result<HashMap<String, FacetType>> {
Ok(self.main.get::<_, Str, SerdeJson<_>>(wtxn, FACETED_FIELDS_KEY)?.unwrap_or_default()) Ok(self.main.get::<_, Str, SerdeJson<_>>(rtxn, FACETED_FIELDS_KEY)?.unwrap_or_default())
}
/// Same as `faceted_fields`, but returns ids instead.
pub fn faceted_fields_ids(&self, rtxn: &RoTxn) -> heed::Result<HashMap<FieldId, FacetType>> {
let faceted_fields = self.faceted_fields(rtxn)?;
let fields_ids_map = self.fields_ids_map(rtxn)?;
let faceted_fields = faceted_fields
.iter()
.map(|(k, v)| {
let kid = fields_ids_map
.id(k)
.ok_or_else(|| format!("{:?} should be present in the field id map", k))
.expect("corrupted data: ");
(kid, *v)
})
.collect();
Ok(faceted_fields)
} }
/* faceted documents ids */ /* faceted documents ids */

View File

@ -148,7 +148,7 @@ impl FacetCondition {
) -> anyhow::Result<FacetCondition> ) -> anyhow::Result<FacetCondition>
{ {
let fields_ids_map = index.fields_ids_map(rtxn)?; let fields_ids_map = index.fields_ids_map(rtxn)?;
let faceted_fields = index.faceted_fields(rtxn)?; let faceted_fields = index.faceted_fields_ids(rtxn)?;
let lexed = FilterParser::parse(Rule::prgm, expression)?; let lexed = FilterParser::parse(Rule::prgm, expression)?;
FacetCondition::from_pairs(&fields_ids_map, &faceted_fields, lexed) FacetCondition::from_pairs(&fields_ids_map, &faceted_fields, lexed)
} }
@ -552,15 +552,15 @@ mod tests {
// Test that the facet condition is correctly generated. // Test that the facet condition is correctly generated.
let rtxn = index.read_txn().unwrap(); let rtxn = index.read_txn().unwrap();
let condition = FacetCondition::from_str(&rtxn, &index, "channel = ponce").unwrap(); let condition = FacetCondition::from_str(&rtxn, &index, "channel = ponce").unwrap();
let expected = OperatorString(1, FacetStringOperator::equal("Ponce")); let expected = OperatorString(0, FacetStringOperator::equal("Ponce"));
assert_eq!(condition, expected); assert_eq!(condition, expected);
let condition = FacetCondition::from_str(&rtxn, &index, "channel != ponce").unwrap(); let condition = FacetCondition::from_str(&rtxn, &index, "channel != ponce").unwrap();
let expected = OperatorString(1, FacetStringOperator::not_equal("ponce")); let expected = OperatorString(0, FacetStringOperator::not_equal("ponce"));
assert_eq!(condition, expected); assert_eq!(condition, expected);
let condition = FacetCondition::from_str(&rtxn, &index, "NOT channel = ponce").unwrap(); let condition = FacetCondition::from_str(&rtxn, &index, "NOT channel = ponce").unwrap();
let expected = OperatorString(1, FacetStringOperator::not_equal("ponce")); let expected = OperatorString(0, FacetStringOperator::not_equal("ponce"));
assert_eq!(condition, expected); assert_eq!(condition, expected);
} }
@ -581,13 +581,13 @@ mod tests {
// Test that the facet condition is correctly generated. // Test that the facet condition is correctly generated.
let rtxn = index.read_txn().unwrap(); let rtxn = index.read_txn().unwrap();
let condition = FacetCondition::from_str(&rtxn, &index, "timestamp 22 TO 44").unwrap(); let condition = FacetCondition::from_str(&rtxn, &index, "timestamp 22 TO 44").unwrap();
let expected = OperatorI64(1, Between(22, 44)); let expected = OperatorI64(0, Between(22, 44));
assert_eq!(condition, expected); assert_eq!(condition, expected);
let condition = FacetCondition::from_str(&rtxn, &index, "NOT timestamp 22 TO 44").unwrap(); let condition = FacetCondition::from_str(&rtxn, &index, "NOT timestamp 22 TO 44").unwrap();
let expected = Or( let expected = Or(
Box::new(OperatorI64(1, LowerThan(22))), Box::new(OperatorI64(0, LowerThan(22))),
Box::new(OperatorI64(1, GreaterThan(44))), Box::new(OperatorI64(0, GreaterThan(44))),
); );
assert_eq!(condition, expected); assert_eq!(condition, expected);
} }

View File

@ -285,9 +285,13 @@ impl<'a> Search<'a> {
} }
}).next(); }).next();
match result { match result {
Some((fid, is_ascending)) => { Some((attr_name, is_ascending)) => {
let faceted_fields = self.index.faceted_fields(self.rtxn)?; let field_id_map = self.index.fields_ids_map(self.rtxn)?;
let ftype = *faceted_fields.get(&fid).context("unknown field id")?; let fid = field_id_map.id(&attr_name).with_context(|| format!("unknown field: {:?}", attr_name))?;
let faceted_fields = self.index.faceted_fields_ids(self.rtxn)?;
let ftype = *faceted_fields.get(&fid)
.with_context(|| format!("{:?} not found in the faceted fields.", attr_name))
.expect("corrupted data: ");
Some((fid, ftype, is_ascending)) Some((fid, ftype, is_ascending))
}, },
None => None, None => None,

View File

@ -342,7 +342,7 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho
if heap.len() > limit { heap.pop(); } if heap.len() > limit { heap.pop(); }
} }
let faceted_fields = index.faceted_fields(rtxn)?; let faceted_fields = index.faceted_fields_ids(rtxn)?;
let fields_ids_map = index.fields_ids_map(rtxn)?; let fields_ids_map = index.fields_ids_map(rtxn)?;
for (field_id, field_type) in faceted_fields { for (field_id, field_type) in faceted_fields {
let facet_name = fields_ids_map.name(field_id).unwrap(); let facet_name = fields_ids_map.name(field_id).unwrap();
@ -413,7 +413,7 @@ fn words_docids(index: &Index, rtxn: &heed::RoTxn, debug: bool, words: Vec<Strin
fn facet_values_docids(index: &Index, rtxn: &heed::RoTxn, debug: bool, field_name: String) -> anyhow::Result<()> { fn facet_values_docids(index: &Index, rtxn: &heed::RoTxn, debug: bool, field_name: String) -> anyhow::Result<()> {
let fields_ids_map = index.fields_ids_map(&rtxn)?; let fields_ids_map = index.fields_ids_map(&rtxn)?;
let faceted_fields = index.faceted_fields(&rtxn)?; let faceted_fields = index.faceted_fields_ids(&rtxn)?;
let field_id = fields_ids_map.id(&field_name) let field_id = fields_ids_map.id(&field_name)
.with_context(|| format!("field {} not found", field_name))?; .with_context(|| format!("field {} not found", field_name))?;
@ -451,7 +451,7 @@ fn facet_values_docids(index: &Index, rtxn: &heed::RoTxn, debug: bool, field_nam
fn facet_stats(index: &Index, rtxn: &heed::RoTxn, field_name: String) -> anyhow::Result<()> { fn facet_stats(index: &Index, rtxn: &heed::RoTxn, field_name: String) -> anyhow::Result<()> {
let fields_ids_map = index.fields_ids_map(&rtxn)?; let fields_ids_map = index.fields_ids_map(&rtxn)?;
let faceted_fields = index.faceted_fields(&rtxn)?; let faceted_fields = index.faceted_fields_ids(&rtxn)?;
let field_id = fields_ids_map.id(&field_name) let field_id = fields_ids_map.id(&field_name)
.with_context(|| format!("field {} not found", field_name))?; .with_context(|| format!("field {} not found", field_name))?;

View File

@ -1,4 +1,3 @@
use std::borrow::Cow;
use std::io::{self, BufRead, Write}; use std::io::{self, BufRead, Write};
use std::iter::once; use std::iter::once;
use std::path::PathBuf; use std::path::PathBuf;
@ -47,9 +46,9 @@ pub fn run(opt: Opt) -> anyhow::Result<()> {
let index = Index::new(options, &opt.database)?; let index = Index::new(options, &opt.database)?;
let rtxn = index.read_txn()?; let rtxn = index.read_txn()?;
let fields_ids_map = index.fields_ids_map(&rtxn)?; let fields_ids_map = index.fields_ids_map(&rtxn)?;
let displayed_fields = match index.displayed_fields(&rtxn)? { let displayed_fields = match index.displayed_fields_ids(&rtxn)? {
Some(fields) => Cow::Borrowed(fields), Some(fields) => fields,
None => Cow::Owned(fields_ids_map.iter().map(|(id, _)| id).collect()), None => fields_ids_map.iter().map(|(id, _)| id).collect(),
}; };
let stdin = io::stdin(); let stdin = io::stdin();

View File

@ -25,7 +25,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
// We retrieve the number of documents ids that we are deleting. // We retrieve the number of documents ids that we are deleting.
let number_of_documents = self.index.number_of_documents(self.wtxn)?; let number_of_documents = self.index.number_of_documents(self.wtxn)?;
let faceted_fields = self.index.faceted_fields(self.wtxn)?; let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?;
// We clean some of the main engine datastructures. // We clean some of the main engine datastructures.
self.index.put_words_fst(self.wtxn, &fst::Set::default())?; self.index.put_words_fst(self.wtxn, &fst::Set::default())?;

View File

@ -188,7 +188,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
drop(iter); drop(iter);
// Remove the documents ids from the faceted documents ids. // Remove the documents ids from the faceted documents ids.
let faceted_fields = self.index.faceted_fields(self.wtxn)?; let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?;
for (field_id, facet_type) in faceted_fields { for (field_id, facet_type) in faceted_fields {
let mut docids = self.index.faceted_documents_ids(self.wtxn, field_id)?; let mut docids = self.index.faceted_documents_ids(self.wtxn, field_id)?;
docids.difference_with(&self.documents_ids); docids.difference_with(&self.documents_ids);

View File

@ -51,7 +51,7 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
pub fn execute(self) -> anyhow::Result<()> { pub fn execute(self) -> anyhow::Result<()> {
// We get the faceted fields to be able to create the facet levels. // We get the faceted fields to be able to create the facet levels.
let faceted_fields = self.index.faceted_fields(self.wtxn)?; let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?;
debug!("Computing and writing the facet values levels docids into LMDB on disk..."); debug!("Computing and writing the facet values levels docids into LMDB on disk...");
for (field_id, facet_type) in faceted_fields { for (field_id, facet_type) in faceted_fields {

View File

@ -338,8 +338,8 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
FacetLevel0ValuesDocids, FacetLevel0ValuesDocids,
} }
let faceted_fields = self.index.faceted_fields(self.wtxn)?; let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?;
let searchable_fields: HashSet<_> = match self.index.searchable_fields(self.wtxn)? { let searchable_fields: HashSet<_> = match self.index.searchable_fields_ids(self.wtxn)? {
Some(fields) => fields.iter().copied().collect(), Some(fields) => fields.iter().copied().collect(),
None => fields_ids_map.iter().map(|(id, _name)| id).collect(), None => fields_ids_map.iter().map(|(id, _name)| id).collect(),
}; };
@ -485,7 +485,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?; self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
// We write the primary key field id into the main database // We write the primary key field id into the main database
self.index.put_primary_key(self.wtxn, primary_key)?; self.index.put_primary_key(self.wtxn, &primary_key)?;
// We write the external documents ids into the main database. // We write the external documents ids into the main database.
self.index.put_external_documents_ids(self.wtxn, &external_documents_ids)?; self.index.put_external_documents_ids(self.wtxn, &external_documents_ids)?;

View File

@ -10,13 +10,15 @@ use log::info;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use serde_json::{Map, Value}; use serde_json::{Map, Value};
use crate::{BEU32, MergeFn, Index, FieldId, FieldsIdsMap, ExternalDocumentsIds}; use crate::{Index, BEU32, MergeFn, FieldsIdsMap, ExternalDocumentsIds, FieldId};
use crate::update::{AvailableDocumentsIds, UpdateIndexingStep}; use crate::update::{AvailableDocumentsIds, UpdateIndexingStep};
use super::merge_function::merge_two_obkvs; use super::merge_function::merge_two_obkvs;
use super::{create_writer, create_sorter, IndexDocumentsMethod}; use super::{create_writer, create_sorter, IndexDocumentsMethod};
const DEFAULT_PRIMARY_KEY_NAME: &str = "id";
pub struct TransformOutput { pub struct TransformOutput {
pub primary_key: FieldId, pub primary_key: String,
pub fields_ids_map: FieldsIdsMap, pub fields_ids_map: FieldsIdsMap,
pub external_documents_ids: ExternalDocumentsIds<'static>, pub external_documents_ids: ExternalDocumentsIds<'static>,
pub new_documents_ids: RoaringBitmap, pub new_documents_ids: RoaringBitmap,
@ -73,7 +75,6 @@ impl Transform<'_, '_> {
{ {
let mut fields_ids_map = self.index.fields_ids_map(self.rtxn)?; let mut fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
let external_documents_ids = self.index.external_documents_ids(self.rtxn).unwrap(); let external_documents_ids = self.index.external_documents_ids(self.rtxn).unwrap();
let primary_key = self.index.primary_key(self.rtxn)?;
// Deserialize the whole batch of documents in memory. // Deserialize the whole batch of documents in memory.
let mut documents: Peekable<Box<dyn Iterator<Item=serde_json::Result<Map<String, Value>>>>> = if is_stream { let mut documents: Peekable<Box<dyn Iterator<Item=serde_json::Result<Map<String, Value>>>>> = if is_stream {
@ -88,27 +89,15 @@ impl Transform<'_, '_> {
}; };
// We extract the primary key from the first document in // We extract the primary key from the first document in
// the batch if it hasn't already been defined in the index. // the batch if it hasn't already been defined in the index
let primary_key = match primary_key { let first = documents.peek().and_then(|r| r.as_ref().ok());
Some(primary_key) => primary_key, let alternative_name = first.and_then(|doc| doc.keys().find(|k| k.contains(DEFAULT_PRIMARY_KEY_NAME)).cloned());
None => { let (primary_key_id, primary_key) = compute_primary_key_pair(
// We ignore a potential error here as we can't early return it now, self.index.primary_key(self.rtxn)?,
// the peek method gives us only a reference on the next item, &mut fields_ids_map,
// we will eventually return it in the iteration just after. alternative_name,
let first = documents.peek().and_then(|r| r.as_ref().ok()); self.autogenerate_docids
match first.and_then(|doc| doc.keys().find(|k| k.contains("id"))) { )?;
Some(key) => fields_ids_map.insert(&key).context("field id limit reached")?,
None => {
if !self.autogenerate_docids {
// If there is no primary key in the current document batch, we must
// return an error and not automatically generate any document id.
return Err(anyhow!("missing primary key"))
}
fields_ids_map.insert("id").context("field id limit reached")?
},
}
},
};
if documents.peek().is_none() { if documents.peek().is_none() {
return Ok(TransformOutput { return Ok(TransformOutput {
@ -122,13 +111,6 @@ impl Transform<'_, '_> {
}); });
} }
// Get the primary key field name now, this way we will
// be able to get the value in the JSON Map document.
let primary_key_name = fields_ids_map
.name(primary_key)
.expect("found the primary key name")
.to_owned();
// We must choose the appropriate merge function for when two or more documents // We must choose the appropriate merge function for when two or more documents
// with the same user id must be merged or fully replaced in the same batch. // with the same user id must be merged or fully replaced in the same batch.
let merge_function = match self.index_documents_method { let merge_function = match self.index_documents_method {
@ -170,7 +152,7 @@ impl Transform<'_, '_> {
// We retrieve the user id from the document based on the primary key name, // We retrieve the user id from the document based on the primary key name,
// if the document id isn't present we generate a uuid. // if the document id isn't present we generate a uuid.
let external_id = match document.get(&primary_key_name) { let external_id = match document.get(&primary_key) {
Some(value) => match value { Some(value) => match value {
Value::String(string) => Cow::Borrowed(string.as_str()), Value::String(string) => Cow::Borrowed(string.as_str()),
Value::Number(number) => Cow::Owned(number.to_string()), Value::Number(number) => Cow::Owned(number.to_string()),
@ -196,7 +178,7 @@ impl Transform<'_, '_> {
serde_json::to_writer(&mut json_buffer, value)?; serde_json::to_writer(&mut json_buffer, value)?;
writer.insert(field_id, &json_buffer)?; writer.insert(field_id, &json_buffer)?;
} }
else if field_id == primary_key { else if field_id == primary_key_id {
// We validate the document id [a-zA-Z0-9\-_]. // We validate the document id [a-zA-Z0-9\-_].
let external_id = match validate_document_id(&external_id) { let external_id = match validate_document_id(&external_id) {
Some(valid) => valid, Some(valid) => valid,
@ -240,42 +222,37 @@ impl Transform<'_, '_> {
let mut csv = csv::Reader::from_reader(reader); let mut csv = csv::Reader::from_reader(reader);
let headers = csv.headers()?; let headers = csv.headers()?;
let primary_key = self.index.primary_key(self.rtxn)?;
// Generate the new fields ids based on the current fields ids and this CSV headers.
let mut fields_ids = Vec::new(); let mut fields_ids = Vec::new();
// Generate the new fields ids based on the current fields ids and this CSV headers.
for (i, header) in headers.iter().enumerate() { for (i, header) in headers.iter().enumerate() {
let id = fields_ids_map.insert(header).context("field id limit reached)")?; let id = fields_ids_map.insert(header).context("field id limit reached)")?;
fields_ids.push((id, i)); fields_ids.push((id, i));
} }
// Extract the position of the primary key in the current headers, None if not found. // Extract the position of the primary key in the current headers, None if not found.
let external_id_pos = match primary_key { let primary_key_pos = match self.index.primary_key(self.rtxn)? {
Some(primary_key) => { Some(primary_key) => {
// Te primary key have is known so we must find the position in the CSV headers. // The primary key is known so we must find the position in the CSV headers.
let name = fields_ids_map.name(primary_key).expect("found the primary key name"); headers.iter().position(|h| h == primary_key)
headers.iter().position(|h| h == name)
}, },
None => headers.iter().position(|h| h.contains("id")), None => headers.iter().position(|h| h.contains("id")),
}; };
// Returns the field id in the fileds ids map, create an "id" field // Returns the field id in the fields ids map, create an "id" field
// in case it is not in the current headers. // in case it is not in the current headers.
let primary_key_field_id = match external_id_pos { let alternative_name = primary_key_pos.map(|pos| headers[pos].to_string());
Some(pos) => fields_ids_map.id(&headers[pos]).expect("found the primary key"), let (primary_key_id, _) = compute_primary_key_pair(
None => { self.index.primary_key(self.rtxn)?,
if !self.autogenerate_docids { &mut fields_ids_map,
// If there is no primary key in the current document batch, we must alternative_name,
// return an error and not automatically generate any document id. self.autogenerate_docids
return Err(anyhow!("missing primary key")) )?;
}
let field_id = fields_ids_map.insert("id").context("field id limit reached")?; // The primary key field is not present in the header, so we need to create it.
// We make sure to add the primary key field id to the fields ids, if primary_key_pos.is_none() {
// this way it is added to the obks. fields_ids.push((primary_key_id, usize::max_value()));
fields_ids.push((field_id, usize::max_value())); }
field_id
},
};
// We sort the fields ids by the fields ids map id, this way we are sure to iterate over // We sort the fields ids by the fields ids map id, this way we are sure to iterate over
// the records fields in the fields ids map order and correctly generate the obkv. // the records fields in the fields ids map order and correctly generate the obkv.
@ -310,7 +287,7 @@ impl Transform<'_, '_> {
} }
// We extract the user id if we know where it is or generate an UUID V4 otherwise. // We extract the user id if we know where it is or generate an UUID V4 otherwise.
let external_id = match external_id_pos { let external_id = match primary_key_pos {
Some(pos) => { Some(pos) => {
let external_id = &record[pos]; let external_id = &record[pos];
// We validate the document id [a-zA-Z0-9\-_]. // We validate the document id [a-zA-Z0-9\-_].
@ -326,7 +303,7 @@ impl Transform<'_, '_> {
// we return the generated document id instead of the record field. // we return the generated document id instead of the record field.
let iter = fields_ids.iter() let iter = fields_ids.iter()
.map(|(fi, i)| { .map(|(fi, i)| {
let field = if *fi == primary_key_field_id { external_id } else { &record[*i] }; let field = if *fi == primary_key_id { external_id } else { &record[*i] };
(fi, field) (fi, field)
}); });
@ -349,9 +326,13 @@ impl Transform<'_, '_> {
// Now that we have a valid sorter that contains the user id and the obkv we // Now that we have a valid sorter that contains the user id and the obkv we
// give it to the last transforming function which returns the TransformOutput. // give it to the last transforming function which returns the TransformOutput.
let primary_key_name = fields_ids_map
.name(primary_key_id)
.map(String::from)
.expect("Primary key must be present in fields id map");
self.output_from_sorter( self.output_from_sorter(
sorter, sorter,
primary_key_field_id, primary_key_name,
fields_ids_map, fields_ids_map,
documents_count, documents_count,
external_documents_ids, external_documents_ids,
@ -365,7 +346,7 @@ impl Transform<'_, '_> {
fn output_from_sorter<F>( fn output_from_sorter<F>(
self, self,
sorter: grenad::Sorter<MergeFn>, sorter: grenad::Sorter<MergeFn>,
primary_key: FieldId, primary_key: String,
fields_ids_map: FieldsIdsMap, fields_ids_map: FieldsIdsMap,
approximate_number_of_documents: usize, approximate_number_of_documents: usize,
mut external_documents_ids: ExternalDocumentsIds<'_>, mut external_documents_ids: ExternalDocumentsIds<'_>,
@ -477,11 +458,11 @@ impl Transform<'_, '_> {
// TODO this can be done in parallel by using the rayon `ThreadPool`. // TODO this can be done in parallel by using the rayon `ThreadPool`.
pub fn remap_index_documents( pub fn remap_index_documents(
self, self,
primary_key: FieldId, primary_key: String,
fields_ids_map: FieldsIdsMap, old_fields_ids_map: FieldsIdsMap,
new_fields_ids_map: FieldsIdsMap,
) -> anyhow::Result<TransformOutput> ) -> anyhow::Result<TransformOutput>
{ {
let current_fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
let external_documents_ids = self.index.external_documents_ids(self.rtxn)?; let external_documents_ids = self.index.external_documents_ids(self.rtxn)?;
let documents_ids = self.index.documents_ids(self.rtxn)?; let documents_ids = self.index.documents_ids(self.rtxn)?;
let documents_count = documents_ids.len() as usize; let documents_count = documents_ids.len() as usize;
@ -499,8 +480,8 @@ impl Transform<'_, '_> {
let mut obkv_writer = obkv::KvWriter::new(&mut obkv_buffer); let mut obkv_writer = obkv::KvWriter::new(&mut obkv_buffer);
// We iterate over the new `FieldsIdsMap` ids in order and construct the new obkv. // We iterate over the new `FieldsIdsMap` ids in order and construct the new obkv.
for (id, name) in fields_ids_map.iter() { for (id, name) in new_fields_ids_map.iter() {
if let Some(val) = current_fields_ids_map.id(name).and_then(|id| obkv.get(id)) { if let Some(val) = old_fields_ids_map.id(name).and_then(|id| obkv.get(id)) {
obkv_writer.insert(id, val)?; obkv_writer.insert(id, val)?;
} }
} }
@ -516,7 +497,7 @@ impl Transform<'_, '_> {
Ok(TransformOutput { Ok(TransformOutput {
primary_key, primary_key,
fields_ids_map, fields_ids_map: new_fields_ids_map,
external_documents_ids: external_documents_ids.into_static(), external_documents_ids: external_documents_ids.into_static(),
new_documents_ids: documents_ids, new_documents_ids: documents_ids,
replaced_documents_ids: RoaringBitmap::default(), replaced_documents_ids: RoaringBitmap::default(),
@ -526,6 +507,42 @@ impl Transform<'_, '_> {
} }
} }
/// Given an optional primary key and an optional alternative name, returns the (field_id, attr_name)
/// for the primary key according to the following rules:
/// - if primary_key is `Some`, returns the id and the name, else
/// - if alternative_name is Some, adds alternative to the fields_ids_map, and returns the pair, else
/// - if autogenerate_docids is true, insert the default id value in the field ids map ("id") and
/// returns the pair, else
/// - returns an error.
fn compute_primary_key_pair(
primary_key: Option<&str>,
fields_ids_map: &mut FieldsIdsMap,
alternative_name: Option<String>,
autogenerate_docids: bool,
) -> anyhow::Result<(FieldId, String)> {
match primary_key {
Some(primary_key) => {
let id = fields_ids_map.id(primary_key).expect("primary key must be present in the fields id map");
Ok((id, primary_key.to_string()))
}
None => {
let name = match alternative_name {
Some(key) => key,
None => {
if !autogenerate_docids {
// If there is no primary key in the current document batch, we must
// return an error and not automatically generate any document id.
anyhow::bail!("missing primary key")
}
DEFAULT_PRIMARY_KEY_NAME.to_string()
},
};
let id = fields_ids_map.insert(&name).context("field id limit reached")?;
Ok((id, name))
},
}
}
/// Only the last value associated with an id is kept. /// Only the last value associated with an id is kept.
fn keep_latest_obkv(_key: &[u8], obkvs: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> { fn keep_latest_obkv(_key: &[u8], obkvs: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
obkvs.last().context("no last value").map(|last| last.clone().into_owned()) obkvs.last().context("no last value").map(|last| last.clone().into_owned())
@ -552,3 +569,73 @@ fn validate_document_id(document_id: &str) -> Option<&str> {
}) })
}) })
} }
#[cfg(test)]
mod test {
use super::*;
mod compute_primary_key {
use super::compute_primary_key_pair;
use super::FieldsIdsMap;
#[test]
#[should_panic]
fn should_panic_primary_key_not_in_map() {
let mut fields_map = FieldsIdsMap::new();
let _result = compute_primary_key_pair(
Some("toto"),
&mut fields_map,
None,
false);
}
#[test]
fn should_return_primary_key_if_is_some() {
let mut fields_map = FieldsIdsMap::new();
fields_map.insert("toto").unwrap();
let result = compute_primary_key_pair(
Some("toto"),
&mut fields_map,
Some("tata".to_string()),
false);
assert_eq!(result.unwrap(), (0u8, "toto".to_string()));
assert_eq!(fields_map.len(), 1);
}
#[test]
fn should_return_alternative_if_primary_is_none() {
let mut fields_map = FieldsIdsMap::new();
let result = compute_primary_key_pair(
None,
&mut fields_map,
Some("tata".to_string()),
false);
assert_eq!(result.unwrap(), (0u8, "tata".to_string()));
assert_eq!(fields_map.len(), 1);
}
#[test]
fn should_return_default_if_both_are_none() {
let mut fields_map = FieldsIdsMap::new();
let result = compute_primary_key_pair(
None,
&mut fields_map,
None,
true);
assert_eq!(result.unwrap(), (0u8, "id".to_string()));
assert_eq!(fields_map.len(), 1);
}
#[test]
fn should_return_err_if_both_are_none_and_recompute_is_false(){
let mut fields_map = FieldsIdsMap::new();
let result = compute_primary_key_pair(
None,
&mut fields_map,
None,
false);
assert!(result.is_err());
assert_eq!(fields_map.len(), 0);
}
}
}

View File

@ -1,14 +1,16 @@
use std::collections::HashMap; use std::collections::HashMap;
use std::str::FromStr; use std::str::FromStr;
use anyhow::{ensure, Context}; use anyhow::Context;
use grenad::CompressionType; use grenad::CompressionType;
use itertools::Itertools;
use rayon::ThreadPool; use rayon::ThreadPool;
use crate::criterion::Criterion;
use crate::facet::FacetType;
use crate::update::index_documents::{Transform, IndexDocumentsMethod}; use crate::update::index_documents::{Transform, IndexDocumentsMethod};
use crate::update::{ClearDocuments, IndexDocuments, UpdateIndexingStep}; use crate::update::{ClearDocuments, IndexDocuments, UpdateIndexingStep};
use crate::facet::FacetType; use crate::{Index, FieldsIdsMap};
use crate::{Index, FieldsIdsMap, Criterion};
pub struct Settings<'a, 't, 'u, 'i> { pub struct Settings<'a, 't, 'u, 'i> {
wtxn: &'t mut heed::RwTxn<'i, 'u>, wtxn: &'t mut heed::RwTxn<'i, 'u>,
@ -26,7 +28,7 @@ pub struct Settings<'a, 't, 'u, 'i> {
// however if it is `Some(None)` it means that the user forced a reset of the setting. // however if it is `Some(None)` it means that the user forced a reset of the setting.
searchable_fields: Option<Option<Vec<String>>>, searchable_fields: Option<Option<Vec<String>>>,
displayed_fields: Option<Option<Vec<String>>>, displayed_fields: Option<Option<Vec<String>>>,
faceted_fields: Option<HashMap<String, String>>, faceted_fields: Option<Option<HashMap<String, String>>>,
criteria: Option<Option<Vec<String>>>, criteria: Option<Option<Vec<String>>>,
} }
@ -67,7 +69,11 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
} }
pub fn set_faceted_fields(&mut self, names_facet_types: HashMap<String, String>) { pub fn set_faceted_fields(&mut self, names_facet_types: HashMap<String, String>) {
self.faceted_fields = Some(names_facet_types); self.faceted_fields = Some(Some(names_facet_types));
}
pub fn reset_faceted_fields(&mut self) {
self.faceted_fields = Some(None);
} }
pub fn reset_criteria(&mut self) { pub fn reset_criteria(&mut self) {
@ -78,183 +84,188 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
self.criteria = Some(Some(criteria)); self.criteria = Some(Some(criteria));
} }
pub fn execute<F>(self, progress_callback: F) -> anyhow::Result<()> fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> anyhow::Result<()>
where where
F: Fn(UpdateIndexingStep) + Sync F: Fn(UpdateIndexingStep) + Sync,
{ {
let mut updated_searchable_fields = None;
let mut updated_faceted_fields = None;
let mut updated_displayed_fields = None;
let mut updated_criteria = None;
// Construct the new FieldsIdsMap based on the searchable fields order.
let fields_ids_map = self.index.fields_ids_map(self.wtxn)?; let fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
let mut fields_ids_map = match self.searchable_fields { // if the settings are set before any document update, we don't need to do anything, and
Some(Some(searchable_fields)) => { // will set the primary key during the first document addition.
let mut new_fields_ids_map = FieldsIdsMap::new(); if self.index.number_of_documents(&self.wtxn)? == 0 {
let mut new_searchable_fields = Vec::new(); return Ok(())
}
for name in searchable_fields { let transform = Transform {
let id = new_fields_ids_map.insert(&name).context("field id limit reached")?; rtxn: &self.wtxn,
new_searchable_fields.push(id); index: self.index,
} log_every_n: self.log_every_n,
chunk_compression_type: self.chunk_compression_type,
for (_, name) in fields_ids_map.iter() { chunk_compression_level: self.chunk_compression_level,
new_fields_ids_map.insert(name).context("field id limit reached")?; chunk_fusing_shrink_size: self.chunk_fusing_shrink_size,
} max_nb_chunks: self.max_nb_chunks,
max_memory: self.max_memory,
updated_searchable_fields = Some(Some(new_searchable_fields)); index_documents_method: IndexDocumentsMethod::ReplaceDocuments,
new_fields_ids_map autogenerate_docids: false,
},
Some(None) => {
updated_searchable_fields = Some(None);
fields_ids_map
},
None => fields_ids_map,
}; };
// We compute or generate the new primary key field id. // There already has been a document addition, the primary key should be set by now.
// TODO make the primary key settable. let primary_key = self.index.primary_key(&self.wtxn)?.context("Index must have a primary key")?;
let primary_key = match self.index.primary_key(&self.wtxn)? {
Some(id) => {
let current_fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
let name = current_fields_ids_map.name(id).unwrap();
fields_ids_map.insert(name).context("field id limit reached")?
},
None => fields_ids_map.insert("id").context("field id limit reached")?,
};
let current_faceted_fields = self.index.faceted_fields(self.wtxn)?; // We remap the documents fields based on the new `FieldsIdsMap`.
if let Some(fields_names_facet_types) = self.faceted_fields { let output = transform.remap_index_documents(
let mut faceted_fields = HashMap::new(); primary_key.to_string(),
for (name, sftype) in fields_names_facet_types { old_fields_ids_map,
let ftype = FacetType::from_str(&sftype).with_context(|| format!("parsing facet type {:?}", sftype))?; fields_ids_map.clone())?;
let id = fields_ids_map.insert(&name).context("field id limit reached")?;
match current_faceted_fields.get(&id) {
Some(pftype) => {
ensure!(ftype == *pftype, "{} facet type changed from {} to {}", name, ftype, pftype);
faceted_fields.insert(id, ftype)
},
None => faceted_fields.insert(id, ftype),
};
}
updated_faceted_fields = Some(faceted_fields); // We clear the full database (words-fst, documents ids and documents content).
} ClearDocuments::new(self.wtxn, self.index).execute()?;
// Check that the displayed attributes have been specified.
if let Some(value) = self.displayed_fields {
match value {
Some(names) => {
let mut new_displayed_fields = Vec::new();
for name in names {
let id = fields_ids_map.insert(&name).context("field id limit reached")?;
new_displayed_fields.push(id);
}
updated_displayed_fields = Some(Some(new_displayed_fields));
}
None => updated_displayed_fields = Some(None),
}
}
if let Some(criteria) = self.criteria {
match criteria {
Some(criteria_names) => {
let mut new_criteria = Vec::new();
for name in criteria_names {
let criterion = Criterion::from_str(&mut fields_ids_map, &name)?;
if let Some(fid) = criterion.field_id() {
let name = fields_ids_map.name(fid).unwrap();
let faceted_fields = updated_faceted_fields.as_ref().unwrap_or(&current_faceted_fields);
ensure!(faceted_fields.contains_key(&fid), "criterion field {} must be faceted", name);
}
new_criteria.push(criterion);
}
updated_criteria = Some(Some(new_criteria));
},
None => updated_criteria = Some(None),
}
}
// If any setting have modified any of the datastructures it means that we need
// to retrieve the documents and then reindex then with the new settings.
if updated_searchable_fields.is_some() || updated_faceted_fields.is_some() {
let transform = Transform {
rtxn: &self.wtxn,
index: self.index,
log_every_n: self.log_every_n,
chunk_compression_type: self.chunk_compression_type,
chunk_compression_level: self.chunk_compression_level,
chunk_fusing_shrink_size: self.chunk_fusing_shrink_size,
max_nb_chunks: self.max_nb_chunks,
max_memory: self.max_memory,
index_documents_method: IndexDocumentsMethod::ReplaceDocuments,
autogenerate_docids: false,
};
// We remap the documents fields based on the new `FieldsIdsMap`.
let output = transform.remap_index_documents(primary_key, fields_ids_map.clone())?;
// We write the new FieldsIdsMap to the database
// this way next indexing methods will be based on that.
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
if let Some(faceted_fields) = updated_faceted_fields {
// We write the faceted_fields fields into the database here.
self.index.put_faceted_fields(self.wtxn, &faceted_fields)?;
}
if let Some(searchable_fields) = updated_searchable_fields {
// The new searchable fields are also written down to make sure
// that the IndexDocuments system takes only these ones into account.
match searchable_fields {
Some(fields) => self.index.put_searchable_fields(self.wtxn, &fields)?,
None => self.index.delete_searchable_fields(self.wtxn).map(drop)?,
}
}
// We clear the full database (words-fst, documents ids and documents content).
ClearDocuments::new(self.wtxn, self.index).execute()?;
// We index the generated `TransformOutput` which must contain
// all the documents with fields in the newly defined searchable order.
let mut indexing_builder = IndexDocuments::new(self.wtxn, self.index);
indexing_builder.log_every_n = self.log_every_n;
indexing_builder.max_nb_chunks = self.max_nb_chunks;
indexing_builder.max_memory = self.max_memory;
indexing_builder.linked_hash_map_size = self.linked_hash_map_size;
indexing_builder.chunk_compression_type = self.chunk_compression_type;
indexing_builder.chunk_compression_level = self.chunk_compression_level;
indexing_builder.chunk_fusing_shrink_size = self.chunk_fusing_shrink_size;
indexing_builder.thread_pool = self.thread_pool;
indexing_builder.execute_raw(output, &progress_callback)?;
}
if let Some(displayed_fields) = updated_displayed_fields {
match displayed_fields {
Some(fields) => self.index.put_displayed_fields(self.wtxn, &fields)?,
None => self.index.delete_displayed_fields(self.wtxn).map(drop)?,
}
}
if let Some(criteria) = updated_criteria {
match criteria {
Some(criteria) => self.index.put_criteria(self.wtxn, &criteria)?,
None => self.index.delete_criteria(self.wtxn).map(drop)?,
}
}
// We index the generated `TransformOutput` which must contain
// all the documents with fields in the newly defined searchable order.
let mut indexing_builder = IndexDocuments::new(self.wtxn, self.index);
indexing_builder.log_every_n = self.log_every_n;
indexing_builder.max_nb_chunks = self.max_nb_chunks;
indexing_builder.max_memory = self.max_memory;
indexing_builder.linked_hash_map_size = self.linked_hash_map_size;
indexing_builder.chunk_compression_type = self.chunk_compression_type;
indexing_builder.chunk_compression_level = self.chunk_compression_level;
indexing_builder.chunk_fusing_shrink_size = self.chunk_fusing_shrink_size;
indexing_builder.thread_pool = self.thread_pool;
indexing_builder.execute_raw(output, &cb)?;
Ok(()) Ok(())
} }
fn update_displayed(&mut self) -> anyhow::Result<bool> {
match self.displayed_fields {
Some(Some(ref fields)) => {
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
// fields are deduplicated, only the first occurrence is taken into account
let names: Vec<_> = fields
.iter()
.unique()
.map(String::as_str)
.collect();
for name in names.iter() {
fields_ids_map
.insert(name)
.context("field id limit exceeded")?;
}
self.index.put_displayed_fields(self.wtxn, &names)?;
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
}
Some(None) => { self.index.delete_displayed_fields(self.wtxn)?; },
None => return Ok(false),
}
Ok(true)
}
/// Udpates the index's searchable attributes. This causes the field map to be recomputed to
/// reflect the order of the searchable attributes.
fn update_searchable(&mut self) -> anyhow::Result<bool> {
match self.searchable_fields {
Some(Some(ref fields)) => {
// every time the searchable attributes are updated, we need to update the
// ids for any settings that uses the facets. (displayed_fields,
// faceted_fields)
let old_fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
let mut new_fields_ids_map = FieldsIdsMap::new();
// fields are deduplicated, only the first occurrence is taken into account
let names = fields
.iter()
.unique()
.map(String::as_str)
.collect::<Vec<_>>();
// Add all the searchable attributes to the field map, and then add the
// remaining fields from the old field map to the new one
for name in names.iter() {
new_fields_ids_map
.insert(&name)
.context("field id limit exceeded")?;
}
for (_, name) in old_fields_ids_map.iter() {
new_fields_ids_map
.insert(&name)
.context("field id limit exceeded")?;
}
self.index.put_searchable_fields(self.wtxn, &names)?;
self.index.put_fields_ids_map(self.wtxn, &new_fields_ids_map)?;
}
Some(None) => { self.index.delete_searchable_fields(self.wtxn)?; },
None => return Ok(false),
}
Ok(true)
}
fn update_facets(&mut self) -> anyhow::Result<bool> {
match self.faceted_fields {
Some(Some(ref fields)) => {
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
let mut new_facets = HashMap::new();
for (name, ty) in fields {
fields_ids_map.insert(name).context("field id limit exceeded")?;
let ty = FacetType::from_str(&ty)?;
new_facets.insert(name.clone(), ty);
}
self.index.put_faceted_fields(self.wtxn, &new_facets)?;
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
}
Some(None) => { self.index.delete_faceted_fields(self.wtxn)?; },
None => return Ok(false)
}
Ok(true)
}
fn update_criteria(&mut self) -> anyhow::Result<()> {
match self.criteria {
Some(Some(ref fields)) => {
let faceted_fields = self.index.faceted_fields(&self.wtxn)?;
let mut new_criteria = Vec::new();
for name in fields {
let criterion = Criterion::from_str(&faceted_fields, &name)?;
new_criteria.push(criterion);
}
self.index.put_criteria(self.wtxn, &new_criteria)?;
}
Some(None) => { self.index.delete_criteria(self.wtxn)?; }
None => (),
}
Ok(())
}
pub fn execute<F>(mut self, progress_callback: F) -> anyhow::Result<()>
where
F: Fn(UpdateIndexingStep) + Sync
{
let old_fields_ids_map = self.index.fields_ids_map(&self.wtxn)?;
self.update_displayed()?;
let facets_updated = self.update_facets()?;
// update_criteria MUST be called after update_facets, since criterion fields must be set
// as facets.
self.update_criteria()?;
let searchable_updated = self.update_searchable()?;
if facets_updated || searchable_updated {
self.reindex(&progress_callback, old_fields_ids_map)?;
}
Ok(())
}
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use crate::update::{IndexDocuments, UpdateFormat};
use heed::EnvOpenOptions; use heed::EnvOpenOptions;
use maplit::hashmap; use maplit::hashmap;
use crate::facet::FacetType;
use crate::update::{IndexDocuments, UpdateFormat};
#[test] #[test]
fn set_and_reset_searchable_fields() { fn set_and_reset_searchable_fields() {
let path = tempfile::tempdir().unwrap(); let path = tempfile::tempdir().unwrap();
@ -336,10 +347,8 @@ mod tests {
// Check that the displayed fields are correctly set to `None` (default value). // Check that the displayed fields are correctly set to `None` (default value).
let rtxn = index.read_txn().unwrap(); let rtxn = index.read_txn().unwrap();
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let fields_ids = index.displayed_fields(&rtxn).unwrap(); let fields_ids = index.displayed_fields(&rtxn).unwrap();
let age_id = fields_ids_map.id("age").unwrap(); assert_eq!(fields_ids.unwrap(), (&["age"][..]));
assert_eq!(fields_ids, Some(&[age_id][..]));
drop(rtxn); drop(rtxn);
// We change the searchable fields to be the "name" field only. // We change the searchable fields to be the "name" field only.
@ -351,10 +360,8 @@ mod tests {
// Check that the displayed fields always contains only the "age" field. // Check that the displayed fields always contains only the "age" field.
let rtxn = index.read_txn().unwrap(); let rtxn = index.read_txn().unwrap();
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let fields_ids = index.displayed_fields(&rtxn).unwrap(); let fields_ids = index.displayed_fields(&rtxn).unwrap();
let age_id = fields_ids_map.id("age").unwrap(); assert_eq!(fields_ids.unwrap(), &["age"][..]);
assert_eq!(fields_ids, Some(&[age_id][..]));
drop(rtxn); drop(rtxn);
} }
@ -402,10 +409,8 @@ mod tests {
// Check that the displayed fields are correctly set to only the "age" field. // Check that the displayed fields are correctly set to only the "age" field.
let rtxn = index.read_txn().unwrap(); let rtxn = index.read_txn().unwrap();
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let age_field_id = fields_ids_map.id("age").unwrap();
let fields_ids = index.displayed_fields(&rtxn).unwrap(); let fields_ids = index.displayed_fields(&rtxn).unwrap();
assert_eq!(fields_ids.unwrap(), &[age_field_id][..]); assert_eq!(fields_ids.unwrap(), &["age"][..]);
drop(rtxn); drop(rtxn);
// We reset the fields ids to become `None`, the default value. // We reset the fields ids to become `None`, the default value.
@ -445,9 +450,9 @@ mod tests {
// Check that the displayed fields are correctly set. // Check that the displayed fields are correctly set.
let rtxn = index.read_txn().unwrap(); let rtxn = index.read_txn().unwrap();
let fields_ids = index.faceted_fields(&rtxn).unwrap(); let fields_ids = index.faceted_fields(&rtxn).unwrap();
assert_eq!(fields_ids, hashmap!{ 1 => FacetType::Integer }); assert_eq!(fields_ids, hashmap!{ "age".to_string() => FacetType::Integer });
// Only count the field_id 0 and level 0 facet values. // Only count the field_id 0 and level 0 facet values.
let count = index.facet_field_id_value_docids.prefix_iter(&rtxn, &[1, 0]).unwrap().count(); let count = index.facet_field_id_value_docids.prefix_iter(&rtxn, &[0, 0]).unwrap().count();
assert_eq!(count, 3); assert_eq!(count, 3);
drop(rtxn); drop(rtxn);
@ -461,8 +466,49 @@ mod tests {
let rtxn = index.read_txn().unwrap(); let rtxn = index.read_txn().unwrap();
// Only count the field_id 0 and level 0 facet values. // Only count the field_id 0 and level 0 facet values.
let count = index.facet_field_id_value_docids.prefix_iter(&rtxn, &[1, 0]).unwrap().count(); let count = index.facet_field_id_value_docids.prefix_iter(&rtxn, &[0, 0]).unwrap().count();
assert_eq!(count, 4); assert_eq!(count, 4);
drop(rtxn); drop(rtxn);
} }
#[test]
fn setting_searchable_recomputes_other_settings() {
let path = tempfile::tempdir().unwrap();
let mut options = EnvOpenOptions::new();
options.map_size(10 * 1024 * 1024); // 10 MB
let index = Index::new(options, &path).unwrap();
// Set all the settings except searchable
let mut wtxn = index.write_txn().unwrap();
let mut builder = Settings::new(&mut wtxn, &index);
builder.set_displayed_fields(vec!["hello".to_string()]);
builder.set_faceted_fields(hashmap!{
"age".into() => "integer".into(),
"toto".into() => "integer".into(),
});
builder.set_criteria(vec!["asc(toto)".to_string()]);
builder.execute(|_| ()).unwrap();
wtxn.commit().unwrap();
// check the output
let rtxn = index.read_txn().unwrap();
assert_eq!(&["hello"][..], index.displayed_fields(&rtxn).unwrap().unwrap());
// since no documents have been pushed the primary key is still unset
assert!(index.primary_key(&rtxn).unwrap().is_none());
assert_eq!(vec![Criterion::Asc("toto".to_string())], index.criteria(&rtxn).unwrap());
drop(rtxn);
// We set toto and age as searchable to force reordering of the fields
let mut wtxn = index.write_txn().unwrap();
let mut builder = Settings::new(&mut wtxn, &index);
builder.set_searchable_fields(vec!["toto".to_string(), "age".to_string()]);
builder.execute(|_| ()).unwrap();
wtxn.commit().unwrap();
let rtxn = index.read_txn().unwrap();
assert_eq!(&["hello"][..], index.displayed_fields(&rtxn).unwrap().unwrap());
assert!(index.primary_key(&rtxn).unwrap().is_none());
assert_eq!(vec![Criterion::Asc("toto".to_string())], index.criteria(&rtxn).unwrap());
drop(rtxn);
}
} }