mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-08 20:44:30 +01:00
Fix settings bug
replace ids with str in settings This allows for better maintainability of the settings code, since updating the searchable attributes is now straightforward. criterion use string fix reindexing fieldid remaping add tests for primary_key compute fix tests fix http-ui fixup! add tests for primary_key compute code improvements settings update deps fixup! code improvements settings fixup! refactor settings updates and fix bug fixup! Fix settings bug fixup! Fix settings bug fixup! Fix settings bug Update src/update/index_documents/transform.rs Co-authored-by: Clément Renault <clement@meilisearch.com> fixup! Fix settings bug
This commit is contained in:
parent
26f060f66b
commit
87a56d2bc9
481
Cargo.lock
generated
481
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
632
http-ui/Cargo.lock
generated
632
http-ui/Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@ -1,4 +1,3 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::fmt::Display;
|
||||
use std::fs::{File, create_dir_all};
|
||||
@ -654,13 +653,13 @@ async fn main() -> anyhow::Result<()> {
|
||||
|
||||
let mut documents = Vec::new();
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let displayed_fields = match index.displayed_fields(&rtxn).unwrap() {
|
||||
Some(fields) => Cow::Borrowed(fields),
|
||||
None => Cow::Owned(fields_ids_map.iter().map(|(id, _)| id).collect()),
|
||||
let displayed_fields = match index.displayed_fields_ids(&rtxn).unwrap() {
|
||||
Some(fields) => fields,
|
||||
None => fields_ids_map.iter().map(|(id, _)| id).collect(),
|
||||
};
|
||||
let attributes_to_highlight = match index.searchable_fields(&rtxn).unwrap() {
|
||||
Some(fields) => fields.iter().flat_map(|id| fields_ids_map.name(*id)).map(ToOwned::to_owned).collect(),
|
||||
None => fields_ids_map.iter().map(|(_, name)| name).map(ToOwned::to_owned).collect(),
|
||||
Some(fields) => fields.into_iter().map(String::from).collect(),
|
||||
None => fields_ids_map.iter().map(|(_, name)| name).map(String::from).collect(),
|
||||
};
|
||||
|
||||
let stop_words = fst::Set::default();
|
||||
@ -690,9 +689,9 @@ async fn main() -> anyhow::Result<()> {
|
||||
|
||||
let external_documents_ids = index.external_documents_ids(&rtxn).unwrap();
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let displayed_fields = match index.displayed_fields(&rtxn).unwrap() {
|
||||
Some(fields) => Cow::Borrowed(fields),
|
||||
None => Cow::Owned(fields_ids_map.iter().map(|(id, _)| id).collect()),
|
||||
let displayed_fields = match index.displayed_fields_ids(&rtxn).unwrap() {
|
||||
Some(fields) => fields,
|
||||
None => fields_ids_map.iter().map(|(id, _)| id).collect(),
|
||||
};
|
||||
|
||||
match external_documents_ids.get(&id) {
|
||||
|
@ -1,10 +1,12 @@
|
||||
use crate::{FieldsIdsMap, FieldId};
|
||||
use std::collections::HashMap;
|
||||
|
||||
use anyhow::{Context, bail};
|
||||
use regex::Regex;
|
||||
use serde::{Serialize, Deserialize};
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Copy, Clone, PartialEq, Eq)]
|
||||
use crate::facet::FacetType;
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
|
||||
pub enum Criterion {
|
||||
/// Sorted by increasing number of typos.
|
||||
Typo,
|
||||
@ -21,13 +23,13 @@ pub enum Criterion {
|
||||
/// Sorted by the similarity of the matched words with the query words.
|
||||
Exactness,
|
||||
/// Sorted by the increasing value of the field specified.
|
||||
Asc(FieldId),
|
||||
Asc(String),
|
||||
/// Sorted by the decreasing value of the field specified.
|
||||
Desc(FieldId),
|
||||
Desc(String),
|
||||
}
|
||||
|
||||
impl Criterion {
|
||||
pub fn from_str(fields_ids_map: &mut FieldsIdsMap, txt: &str) -> anyhow::Result<Criterion> {
|
||||
pub fn from_str(faceted_attributes: &HashMap<String, FacetType>, txt: &str) -> anyhow::Result<Criterion> {
|
||||
match txt {
|
||||
"typo" => Ok(Criterion::Typo),
|
||||
"words" => Ok(Criterion::Words),
|
||||
@ -40,22 +42,15 @@ impl Criterion {
|
||||
let caps = re.captures(text).with_context(|| format!("unknown criterion name: {}", text))?;
|
||||
let order = caps.get(1).unwrap().as_str();
|
||||
let field_name = caps.get(2).unwrap().as_str();
|
||||
let field_id = fields_ids_map.insert(field_name).context("field id limit reached")?;
|
||||
faceted_attributes.get(field_name).with_context(|| format!("Can't use {:?} as a criterion as it isn't a faceted field.", field_name))?;
|
||||
match order {
|
||||
"asc" => Ok(Criterion::Asc(field_id)),
|
||||
"desc" => Ok(Criterion::Desc(field_id)),
|
||||
"asc" => Ok(Criterion::Asc(field_name.to_string())),
|
||||
"desc" => Ok(Criterion::Desc(field_name.to_string())),
|
||||
otherwise => bail!("unknown criterion name: {}", otherwise),
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn field_id(&self) -> Option<FieldId> {
|
||||
match *self {
|
||||
Criterion::Asc(fid) | Criterion::Desc(fid) => Some(fid),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn default_criteria() -> Vec<Criterion> {
|
||||
|
89
src/index.rs
89
src/index.rs
@ -112,8 +112,8 @@ impl Index {
|
||||
/* primary key */
|
||||
|
||||
/// Writes the documents primary key, this is the field name that is used to store the id.
|
||||
pub fn put_primary_key(&self, wtxn: &mut RwTxn, primary_key: FieldId) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, OwnedType<FieldId>>(wtxn, PRIMARY_KEY_KEY, &primary_key)
|
||||
pub fn put_primary_key(&self, wtxn: &mut RwTxn, primary_key: &str) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, Str>(wtxn, PRIMARY_KEY_KEY, &primary_key)
|
||||
}
|
||||
|
||||
/// Deletes the primary key of the documents, this can be done to reset indexes settings.
|
||||
@ -122,8 +122,8 @@ impl Index {
|
||||
}
|
||||
|
||||
/// Returns the documents primary key, `None` if it hasn't been defined.
|
||||
pub fn primary_key(&self, rtxn: &RoTxn) -> heed::Result<Option<FieldId>> {
|
||||
self.main.get::<_, Str, OwnedType<FieldId>>(rtxn, PRIMARY_KEY_KEY)
|
||||
pub fn primary_key<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<&'t str>> {
|
||||
self.main.get::<_, Str, Str>(rtxn, PRIMARY_KEY_KEY)
|
||||
}
|
||||
|
||||
/* external documents ids */
|
||||
@ -175,10 +175,10 @@ impl Index {
|
||||
|
||||
/* displayed fields */
|
||||
|
||||
/// Writes the fields ids that must be displayed in the defined order.
|
||||
/// Writes the fields that must be displayed in the defined order.
|
||||
/// There must be not be any duplicate field id.
|
||||
pub fn put_displayed_fields(&self, wtxn: &mut RwTxn, fields: &[FieldId]) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, ByteSlice>(wtxn, DISPLAYED_FIELDS_KEY, fields)
|
||||
pub fn put_displayed_fields(&self, wtxn: &mut RwTxn, fields: &[&str]) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeBincode<&[&str]>>(wtxn, DISPLAYED_FIELDS_KEY, &fields)
|
||||
}
|
||||
|
||||
/// Deletes the displayed fields ids, this will make the engine to display
|
||||
@ -187,18 +187,27 @@ impl Index {
|
||||
self.main.delete::<_, Str>(wtxn, DISPLAYED_FIELDS_KEY)
|
||||
}
|
||||
|
||||
/// Returns the displayed fields ids in the order they must be returned. If it returns
|
||||
/// `None` it means that all the attributes are displayed in the order of the `FieldsIdsMap`.
|
||||
pub fn displayed_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<&'t [FieldId]>> {
|
||||
self.main.get::<_, Str, ByteSlice>(rtxn, DISPLAYED_FIELDS_KEY)
|
||||
/// Returns the displayed fields in the order they were set by the user. If it returns
|
||||
/// `None` it means that all the attributes are set as displayed in the order of the `FieldsIdsMap`.
|
||||
pub fn displayed_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<Vec<&'t str>>> {
|
||||
self.main.get::<_, Str, SerdeBincode<Vec<&'t str>>>(rtxn, DISPLAYED_FIELDS_KEY)
|
||||
}
|
||||
|
||||
pub fn displayed_fields_ids(&self, rtxn: &RoTxn) -> heed::Result<Option<Vec<FieldId>>> {
|
||||
let fields_ids_map = self.fields_ids_map(rtxn)?;
|
||||
let ids = self.displayed_fields(rtxn)?
|
||||
.map(|fields| fields
|
||||
.into_iter()
|
||||
.map(|name| fields_ids_map.id(name).expect("Field not found"))
|
||||
.collect::<Vec<_>>());
|
||||
Ok(ids)
|
||||
}
|
||||
|
||||
/* searchable fields */
|
||||
|
||||
/// Writes the searchable fields, when this list is specified, only these are indexed.
|
||||
pub fn put_searchable_fields(&self, wtxn: &mut RwTxn, fields: &[FieldId]) -> heed::Result<()> {
|
||||
assert!(fields.windows(2).all(|win| win[0] < win[1])); // is sorted
|
||||
self.main.put::<_, Str, ByteSlice>(wtxn, SEARCHABLE_FIELDS_KEY, fields)
|
||||
pub fn put_searchable_fields(&self, wtxn: &mut RwTxn, fields: &[&str]) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeBincode<&[&str]>>(wtxn, SEARCHABLE_FIELDS_KEY, &fields)
|
||||
}
|
||||
|
||||
/// Deletes the searchable fields, when no fields are specified, all fields are indexed.
|
||||
@ -206,17 +215,36 @@ impl Index {
|
||||
self.main.delete::<_, Str>(wtxn, SEARCHABLE_FIELDS_KEY)
|
||||
}
|
||||
|
||||
/// Returns the searchable fields ids, those are the fields that are indexed,
|
||||
/// Returns the searchable fields, those are the fields that are indexed,
|
||||
/// if the searchable fields aren't there it means that **all** the fields are indexed.
|
||||
pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<&'t [FieldId]>> {
|
||||
self.main.get::<_, Str, ByteSlice>(rtxn, SEARCHABLE_FIELDS_KEY)
|
||||
pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<Vec<&'t str>>> {
|
||||
self.main.get::<_, Str, SerdeBincode<Vec<&'t str>>>(rtxn, SEARCHABLE_FIELDS_KEY)
|
||||
}
|
||||
|
||||
/// Identical to `searchable_fields`, but returns the ids instead.
|
||||
pub fn searchable_fields_ids(&self, rtxn: &RoTxn) -> heed::Result<Option<Vec<FieldId>>> {
|
||||
match self.searchable_fields(rtxn)? {
|
||||
Some(names) => {
|
||||
let fields_map = self.fields_ids_map(rtxn)?;
|
||||
let mut ids = Vec::new();
|
||||
for name in names {
|
||||
let id = fields_map
|
||||
.id(name)
|
||||
.ok_or_else(|| format!("field id map must contain {:?}", name))
|
||||
.expect("corrupted data: ");
|
||||
ids.push(id);
|
||||
}
|
||||
Ok(Some(ids))
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
/* faceted fields */
|
||||
|
||||
/// Writes the facet fields ids associated with their facet type or `None` if
|
||||
/// Writes the facet fields associated with their facet type or `None` if
|
||||
/// the facet type is currently unknown.
|
||||
pub fn put_faceted_fields(&self, wtxn: &mut RwTxn, fields_types: &HashMap<FieldId, FacetType>) -> heed::Result<()> {
|
||||
pub fn put_faceted_fields(&self, wtxn: &mut RwTxn, fields_types: &HashMap<String, FacetType>) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeJson<_>>(wtxn, FACETED_FIELDS_KEY, fields_types)
|
||||
}
|
||||
|
||||
@ -225,9 +253,26 @@ impl Index {
|
||||
self.main.delete::<_, Str>(wtxn, FACETED_FIELDS_KEY)
|
||||
}
|
||||
|
||||
/// Returns the facet fields ids associated with their facet type.
|
||||
pub fn faceted_fields(&self, wtxn: &RoTxn) -> heed::Result<HashMap<FieldId, FacetType>> {
|
||||
Ok(self.main.get::<_, Str, SerdeJson<_>>(wtxn, FACETED_FIELDS_KEY)?.unwrap_or_default())
|
||||
/// Returns the facet fields names associated with their facet type.
|
||||
pub fn faceted_fields(&self, rtxn: &RoTxn) -> heed::Result<HashMap<String, FacetType>> {
|
||||
Ok(self.main.get::<_, Str, SerdeJson<_>>(rtxn, FACETED_FIELDS_KEY)?.unwrap_or_default())
|
||||
}
|
||||
|
||||
/// Same as `faceted_fields`, but returns ids instead.
|
||||
pub fn faceted_fields_ids(&self, rtxn: &RoTxn) -> heed::Result<HashMap<FieldId, FacetType>> {
|
||||
let faceted_fields = self.faceted_fields(rtxn)?;
|
||||
let fields_ids_map = self.fields_ids_map(rtxn)?;
|
||||
let faceted_fields = faceted_fields
|
||||
.iter()
|
||||
.map(|(k, v)| {
|
||||
let kid = fields_ids_map
|
||||
.id(k)
|
||||
.ok_or_else(|| format!("{:?} should be present in the field id map", k))
|
||||
.expect("corrupted data: ");
|
||||
(kid, *v)
|
||||
})
|
||||
.collect();
|
||||
Ok(faceted_fields)
|
||||
}
|
||||
|
||||
/* faceted documents ids */
|
||||
|
@ -148,7 +148,7 @@ impl FacetCondition {
|
||||
) -> anyhow::Result<FacetCondition>
|
||||
{
|
||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||
let faceted_fields = index.faceted_fields(rtxn)?;
|
||||
let faceted_fields = index.faceted_fields_ids(rtxn)?;
|
||||
let lexed = FilterParser::parse(Rule::prgm, expression)?;
|
||||
FacetCondition::from_pairs(&fields_ids_map, &faceted_fields, lexed)
|
||||
}
|
||||
@ -552,15 +552,15 @@ mod tests {
|
||||
// Test that the facet condition is correctly generated.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let condition = FacetCondition::from_str(&rtxn, &index, "channel = ponce").unwrap();
|
||||
let expected = OperatorString(1, FacetStringOperator::equal("Ponce"));
|
||||
let expected = OperatorString(0, FacetStringOperator::equal("Ponce"));
|
||||
assert_eq!(condition, expected);
|
||||
|
||||
let condition = FacetCondition::from_str(&rtxn, &index, "channel != ponce").unwrap();
|
||||
let expected = OperatorString(1, FacetStringOperator::not_equal("ponce"));
|
||||
let expected = OperatorString(0, FacetStringOperator::not_equal("ponce"));
|
||||
assert_eq!(condition, expected);
|
||||
|
||||
let condition = FacetCondition::from_str(&rtxn, &index, "NOT channel = ponce").unwrap();
|
||||
let expected = OperatorString(1, FacetStringOperator::not_equal("ponce"));
|
||||
let expected = OperatorString(0, FacetStringOperator::not_equal("ponce"));
|
||||
assert_eq!(condition, expected);
|
||||
}
|
||||
|
||||
@ -581,13 +581,13 @@ mod tests {
|
||||
// Test that the facet condition is correctly generated.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let condition = FacetCondition::from_str(&rtxn, &index, "timestamp 22 TO 44").unwrap();
|
||||
let expected = OperatorI64(1, Between(22, 44));
|
||||
let expected = OperatorI64(0, Between(22, 44));
|
||||
assert_eq!(condition, expected);
|
||||
|
||||
let condition = FacetCondition::from_str(&rtxn, &index, "NOT timestamp 22 TO 44").unwrap();
|
||||
let expected = Or(
|
||||
Box::new(OperatorI64(1, LowerThan(22))),
|
||||
Box::new(OperatorI64(1, GreaterThan(44))),
|
||||
Box::new(OperatorI64(0, LowerThan(22))),
|
||||
Box::new(OperatorI64(0, GreaterThan(44))),
|
||||
);
|
||||
assert_eq!(condition, expected);
|
||||
}
|
||||
|
@ -285,9 +285,13 @@ impl<'a> Search<'a> {
|
||||
}
|
||||
}).next();
|
||||
match result {
|
||||
Some((fid, is_ascending)) => {
|
||||
let faceted_fields = self.index.faceted_fields(self.rtxn)?;
|
||||
let ftype = *faceted_fields.get(&fid).context("unknown field id")?;
|
||||
Some((attr_name, is_ascending)) => {
|
||||
let field_id_map = self.index.fields_ids_map(self.rtxn)?;
|
||||
let fid = field_id_map.id(&attr_name).with_context(|| format!("unknown field: {:?}", attr_name))?;
|
||||
let faceted_fields = self.index.faceted_fields_ids(self.rtxn)?;
|
||||
let ftype = *faceted_fields.get(&fid)
|
||||
.with_context(|| format!("{:?} not found in the faceted fields.", attr_name))
|
||||
.expect("corrupted data: ");
|
||||
Some((fid, ftype, is_ascending))
|
||||
},
|
||||
None => None,
|
||||
|
@ -342,7 +342,7 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho
|
||||
if heap.len() > limit { heap.pop(); }
|
||||
}
|
||||
|
||||
let faceted_fields = index.faceted_fields(rtxn)?;
|
||||
let faceted_fields = index.faceted_fields_ids(rtxn)?;
|
||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||
for (field_id, field_type) in faceted_fields {
|
||||
let facet_name = fields_ids_map.name(field_id).unwrap();
|
||||
@ -413,7 +413,7 @@ fn words_docids(index: &Index, rtxn: &heed::RoTxn, debug: bool, words: Vec<Strin
|
||||
|
||||
fn facet_values_docids(index: &Index, rtxn: &heed::RoTxn, debug: bool, field_name: String) -> anyhow::Result<()> {
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||
let faceted_fields = index.faceted_fields(&rtxn)?;
|
||||
let faceted_fields = index.faceted_fields_ids(&rtxn)?;
|
||||
|
||||
let field_id = fields_ids_map.id(&field_name)
|
||||
.with_context(|| format!("field {} not found", field_name))?;
|
||||
@ -451,7 +451,7 @@ fn facet_values_docids(index: &Index, rtxn: &heed::RoTxn, debug: bool, field_nam
|
||||
|
||||
fn facet_stats(index: &Index, rtxn: &heed::RoTxn, field_name: String) -> anyhow::Result<()> {
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||
let faceted_fields = index.faceted_fields(&rtxn)?;
|
||||
let faceted_fields = index.faceted_fields_ids(&rtxn)?;
|
||||
|
||||
let field_id = fields_ids_map.id(&field_name)
|
||||
.with_context(|| format!("field {} not found", field_name))?;
|
||||
|
@ -1,4 +1,3 @@
|
||||
use std::borrow::Cow;
|
||||
use std::io::{self, BufRead, Write};
|
||||
use std::iter::once;
|
||||
use std::path::PathBuf;
|
||||
@ -47,9 +46,9 @@ pub fn run(opt: Opt) -> anyhow::Result<()> {
|
||||
let index = Index::new(options, &opt.database)?;
|
||||
let rtxn = index.read_txn()?;
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||
let displayed_fields = match index.displayed_fields(&rtxn)? {
|
||||
Some(fields) => Cow::Borrowed(fields),
|
||||
None => Cow::Owned(fields_ids_map.iter().map(|(id, _)| id).collect()),
|
||||
let displayed_fields = match index.displayed_fields_ids(&rtxn)? {
|
||||
Some(fields) => fields,
|
||||
None => fields_ids_map.iter().map(|(id, _)| id).collect(),
|
||||
};
|
||||
|
||||
let stdin = io::stdin();
|
||||
|
@ -25,7 +25,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
|
||||
|
||||
// We retrieve the number of documents ids that we are deleting.
|
||||
let number_of_documents = self.index.number_of_documents(self.wtxn)?;
|
||||
let faceted_fields = self.index.faceted_fields(self.wtxn)?;
|
||||
let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?;
|
||||
|
||||
// We clean some of the main engine datastructures.
|
||||
self.index.put_words_fst(self.wtxn, &fst::Set::default())?;
|
||||
|
@ -188,7 +188,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
||||
drop(iter);
|
||||
|
||||
// Remove the documents ids from the faceted documents ids.
|
||||
let faceted_fields = self.index.faceted_fields(self.wtxn)?;
|
||||
let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?;
|
||||
for (field_id, facet_type) in faceted_fields {
|
||||
let mut docids = self.index.faceted_documents_ids(self.wtxn, field_id)?;
|
||||
docids.difference_with(&self.documents_ids);
|
||||
|
@ -51,7 +51,7 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
|
||||
|
||||
pub fn execute(self) -> anyhow::Result<()> {
|
||||
// We get the faceted fields to be able to create the facet levels.
|
||||
let faceted_fields = self.index.faceted_fields(self.wtxn)?;
|
||||
let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?;
|
||||
|
||||
debug!("Computing and writing the facet values levels docids into LMDB on disk...");
|
||||
for (field_id, facet_type) in faceted_fields {
|
||||
|
@ -338,8 +338,8 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
||||
FacetLevel0ValuesDocids,
|
||||
}
|
||||
|
||||
let faceted_fields = self.index.faceted_fields(self.wtxn)?;
|
||||
let searchable_fields: HashSet<_> = match self.index.searchable_fields(self.wtxn)? {
|
||||
let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?;
|
||||
let searchable_fields: HashSet<_> = match self.index.searchable_fields_ids(self.wtxn)? {
|
||||
Some(fields) => fields.iter().copied().collect(),
|
||||
None => fields_ids_map.iter().map(|(id, _name)| id).collect(),
|
||||
};
|
||||
@ -485,7 +485,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
||||
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
|
||||
|
||||
// We write the primary key field id into the main database
|
||||
self.index.put_primary_key(self.wtxn, primary_key)?;
|
||||
self.index.put_primary_key(self.wtxn, &primary_key)?;
|
||||
|
||||
// We write the external documents ids into the main database.
|
||||
self.index.put_external_documents_ids(self.wtxn, &external_documents_ids)?;
|
||||
|
@ -10,13 +10,15 @@ use log::info;
|
||||
use roaring::RoaringBitmap;
|
||||
use serde_json::{Map, Value};
|
||||
|
||||
use crate::{BEU32, MergeFn, Index, FieldId, FieldsIdsMap, ExternalDocumentsIds};
|
||||
use crate::{Index, BEU32, MergeFn, FieldsIdsMap, ExternalDocumentsIds, FieldId};
|
||||
use crate::update::{AvailableDocumentsIds, UpdateIndexingStep};
|
||||
use super::merge_function::merge_two_obkvs;
|
||||
use super::{create_writer, create_sorter, IndexDocumentsMethod};
|
||||
|
||||
const DEFAULT_PRIMARY_KEY_NAME: &str = "id";
|
||||
|
||||
pub struct TransformOutput {
|
||||
pub primary_key: FieldId,
|
||||
pub primary_key: String,
|
||||
pub fields_ids_map: FieldsIdsMap,
|
||||
pub external_documents_ids: ExternalDocumentsIds<'static>,
|
||||
pub new_documents_ids: RoaringBitmap,
|
||||
@ -73,7 +75,6 @@ impl Transform<'_, '_> {
|
||||
{
|
||||
let mut fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
|
||||
let external_documents_ids = self.index.external_documents_ids(self.rtxn).unwrap();
|
||||
let primary_key = self.index.primary_key(self.rtxn)?;
|
||||
|
||||
// Deserialize the whole batch of documents in memory.
|
||||
let mut documents: Peekable<Box<dyn Iterator<Item=serde_json::Result<Map<String, Value>>>>> = if is_stream {
|
||||
@ -88,27 +89,15 @@ impl Transform<'_, '_> {
|
||||
};
|
||||
|
||||
// We extract the primary key from the first document in
|
||||
// the batch if it hasn't already been defined in the index.
|
||||
let primary_key = match primary_key {
|
||||
Some(primary_key) => primary_key,
|
||||
None => {
|
||||
// We ignore a potential error here as we can't early return it now,
|
||||
// the peek method gives us only a reference on the next item,
|
||||
// we will eventually return it in the iteration just after.
|
||||
let first = documents.peek().and_then(|r| r.as_ref().ok());
|
||||
match first.and_then(|doc| doc.keys().find(|k| k.contains("id"))) {
|
||||
Some(key) => fields_ids_map.insert(&key).context("field id limit reached")?,
|
||||
None => {
|
||||
if !self.autogenerate_docids {
|
||||
// If there is no primary key in the current document batch, we must
|
||||
// return an error and not automatically generate any document id.
|
||||
return Err(anyhow!("missing primary key"))
|
||||
}
|
||||
fields_ids_map.insert("id").context("field id limit reached")?
|
||||
},
|
||||
}
|
||||
},
|
||||
};
|
||||
// the batch if it hasn't already been defined in the index
|
||||
let first = documents.peek().and_then(|r| r.as_ref().ok());
|
||||
let alternative_name = first.and_then(|doc| doc.keys().find(|k| k.contains(DEFAULT_PRIMARY_KEY_NAME)).cloned());
|
||||
let (primary_key_id, primary_key) = compute_primary_key_pair(
|
||||
self.index.primary_key(self.rtxn)?,
|
||||
&mut fields_ids_map,
|
||||
alternative_name,
|
||||
self.autogenerate_docids
|
||||
)?;
|
||||
|
||||
if documents.peek().is_none() {
|
||||
return Ok(TransformOutput {
|
||||
@ -122,13 +111,6 @@ impl Transform<'_, '_> {
|
||||
});
|
||||
}
|
||||
|
||||
// Get the primary key field name now, this way we will
|
||||
// be able to get the value in the JSON Map document.
|
||||
let primary_key_name = fields_ids_map
|
||||
.name(primary_key)
|
||||
.expect("found the primary key name")
|
||||
.to_owned();
|
||||
|
||||
// We must choose the appropriate merge function for when two or more documents
|
||||
// with the same user id must be merged or fully replaced in the same batch.
|
||||
let merge_function = match self.index_documents_method {
|
||||
@ -170,7 +152,7 @@ impl Transform<'_, '_> {
|
||||
|
||||
// We retrieve the user id from the document based on the primary key name,
|
||||
// if the document id isn't present we generate a uuid.
|
||||
let external_id = match document.get(&primary_key_name) {
|
||||
let external_id = match document.get(&primary_key) {
|
||||
Some(value) => match value {
|
||||
Value::String(string) => Cow::Borrowed(string.as_str()),
|
||||
Value::Number(number) => Cow::Owned(number.to_string()),
|
||||
@ -196,7 +178,7 @@ impl Transform<'_, '_> {
|
||||
serde_json::to_writer(&mut json_buffer, value)?;
|
||||
writer.insert(field_id, &json_buffer)?;
|
||||
}
|
||||
else if field_id == primary_key {
|
||||
else if field_id == primary_key_id {
|
||||
// We validate the document id [a-zA-Z0-9\-_].
|
||||
let external_id = match validate_document_id(&external_id) {
|
||||
Some(valid) => valid,
|
||||
@ -240,42 +222,37 @@ impl Transform<'_, '_> {
|
||||
|
||||
let mut csv = csv::Reader::from_reader(reader);
|
||||
let headers = csv.headers()?;
|
||||
let primary_key = self.index.primary_key(self.rtxn)?;
|
||||
|
||||
// Generate the new fields ids based on the current fields ids and this CSV headers.
|
||||
let mut fields_ids = Vec::new();
|
||||
// Generate the new fields ids based on the current fields ids and this CSV headers.
|
||||
for (i, header) in headers.iter().enumerate() {
|
||||
let id = fields_ids_map.insert(header).context("field id limit reached)")?;
|
||||
fields_ids.push((id, i));
|
||||
}
|
||||
|
||||
// Extract the position of the primary key in the current headers, None if not found.
|
||||
let external_id_pos = match primary_key {
|
||||
let primary_key_pos = match self.index.primary_key(self.rtxn)? {
|
||||
Some(primary_key) => {
|
||||
// Te primary key have is known so we must find the position in the CSV headers.
|
||||
let name = fields_ids_map.name(primary_key).expect("found the primary key name");
|
||||
headers.iter().position(|h| h == name)
|
||||
// The primary key is known so we must find the position in the CSV headers.
|
||||
headers.iter().position(|h| h == primary_key)
|
||||
},
|
||||
None => headers.iter().position(|h| h.contains("id")),
|
||||
};
|
||||
|
||||
// Returns the field id in the fileds ids map, create an "id" field
|
||||
// Returns the field id in the fields ids map, create an "id" field
|
||||
// in case it is not in the current headers.
|
||||
let primary_key_field_id = match external_id_pos {
|
||||
Some(pos) => fields_ids_map.id(&headers[pos]).expect("found the primary key"),
|
||||
None => {
|
||||
if !self.autogenerate_docids {
|
||||
// If there is no primary key in the current document batch, we must
|
||||
// return an error and not automatically generate any document id.
|
||||
return Err(anyhow!("missing primary key"))
|
||||
}
|
||||
let field_id = fields_ids_map.insert("id").context("field id limit reached")?;
|
||||
// We make sure to add the primary key field id to the fields ids,
|
||||
// this way it is added to the obks.
|
||||
fields_ids.push((field_id, usize::max_value()));
|
||||
field_id
|
||||
},
|
||||
};
|
||||
let alternative_name = primary_key_pos.map(|pos| headers[pos].to_string());
|
||||
let (primary_key_id, _) = compute_primary_key_pair(
|
||||
self.index.primary_key(self.rtxn)?,
|
||||
&mut fields_ids_map,
|
||||
alternative_name,
|
||||
self.autogenerate_docids
|
||||
)?;
|
||||
|
||||
// The primary key field is not present in the header, so we need to create it.
|
||||
if primary_key_pos.is_none() {
|
||||
fields_ids.push((primary_key_id, usize::max_value()));
|
||||
}
|
||||
|
||||
// We sort the fields ids by the fields ids map id, this way we are sure to iterate over
|
||||
// the records fields in the fields ids map order and correctly generate the obkv.
|
||||
@ -310,7 +287,7 @@ impl Transform<'_, '_> {
|
||||
}
|
||||
|
||||
// We extract the user id if we know where it is or generate an UUID V4 otherwise.
|
||||
let external_id = match external_id_pos {
|
||||
let external_id = match primary_key_pos {
|
||||
Some(pos) => {
|
||||
let external_id = &record[pos];
|
||||
// We validate the document id [a-zA-Z0-9\-_].
|
||||
@ -326,7 +303,7 @@ impl Transform<'_, '_> {
|
||||
// we return the generated document id instead of the record field.
|
||||
let iter = fields_ids.iter()
|
||||
.map(|(fi, i)| {
|
||||
let field = if *fi == primary_key_field_id { external_id } else { &record[*i] };
|
||||
let field = if *fi == primary_key_id { external_id } else { &record[*i] };
|
||||
(fi, field)
|
||||
});
|
||||
|
||||
@ -349,9 +326,13 @@ impl Transform<'_, '_> {
|
||||
|
||||
// Now that we have a valid sorter that contains the user id and the obkv we
|
||||
// give it to the last transforming function which returns the TransformOutput.
|
||||
let primary_key_name = fields_ids_map
|
||||
.name(primary_key_id)
|
||||
.map(String::from)
|
||||
.expect("Primary key must be present in fields id map");
|
||||
self.output_from_sorter(
|
||||
sorter,
|
||||
primary_key_field_id,
|
||||
primary_key_name,
|
||||
fields_ids_map,
|
||||
documents_count,
|
||||
external_documents_ids,
|
||||
@ -365,7 +346,7 @@ impl Transform<'_, '_> {
|
||||
fn output_from_sorter<F>(
|
||||
self,
|
||||
sorter: grenad::Sorter<MergeFn>,
|
||||
primary_key: FieldId,
|
||||
primary_key: String,
|
||||
fields_ids_map: FieldsIdsMap,
|
||||
approximate_number_of_documents: usize,
|
||||
mut external_documents_ids: ExternalDocumentsIds<'_>,
|
||||
@ -477,11 +458,11 @@ impl Transform<'_, '_> {
|
||||
// TODO this can be done in parallel by using the rayon `ThreadPool`.
|
||||
pub fn remap_index_documents(
|
||||
self,
|
||||
primary_key: FieldId,
|
||||
fields_ids_map: FieldsIdsMap,
|
||||
primary_key: String,
|
||||
old_fields_ids_map: FieldsIdsMap,
|
||||
new_fields_ids_map: FieldsIdsMap,
|
||||
) -> anyhow::Result<TransformOutput>
|
||||
{
|
||||
let current_fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
|
||||
let external_documents_ids = self.index.external_documents_ids(self.rtxn)?;
|
||||
let documents_ids = self.index.documents_ids(self.rtxn)?;
|
||||
let documents_count = documents_ids.len() as usize;
|
||||
@ -499,8 +480,8 @@ impl Transform<'_, '_> {
|
||||
let mut obkv_writer = obkv::KvWriter::new(&mut obkv_buffer);
|
||||
|
||||
// We iterate over the new `FieldsIdsMap` ids in order and construct the new obkv.
|
||||
for (id, name) in fields_ids_map.iter() {
|
||||
if let Some(val) = current_fields_ids_map.id(name).and_then(|id| obkv.get(id)) {
|
||||
for (id, name) in new_fields_ids_map.iter() {
|
||||
if let Some(val) = old_fields_ids_map.id(name).and_then(|id| obkv.get(id)) {
|
||||
obkv_writer.insert(id, val)?;
|
||||
}
|
||||
}
|
||||
@ -516,7 +497,7 @@ impl Transform<'_, '_> {
|
||||
|
||||
Ok(TransformOutput {
|
||||
primary_key,
|
||||
fields_ids_map,
|
||||
fields_ids_map: new_fields_ids_map,
|
||||
external_documents_ids: external_documents_ids.into_static(),
|
||||
new_documents_ids: documents_ids,
|
||||
replaced_documents_ids: RoaringBitmap::default(),
|
||||
@ -526,6 +507,42 @@ impl Transform<'_, '_> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Given an optional primary key and an optional alternative name, returns the (field_id, attr_name)
|
||||
/// for the primary key according to the following rules:
|
||||
/// - if primary_key is `Some`, returns the id and the name, else
|
||||
/// - if alternative_name is Some, adds alternative to the fields_ids_map, and returns the pair, else
|
||||
/// - if autogenerate_docids is true, insert the default id value in the field ids map ("id") and
|
||||
/// returns the pair, else
|
||||
/// - returns an error.
|
||||
fn compute_primary_key_pair(
|
||||
primary_key: Option<&str>,
|
||||
fields_ids_map: &mut FieldsIdsMap,
|
||||
alternative_name: Option<String>,
|
||||
autogenerate_docids: bool,
|
||||
) -> anyhow::Result<(FieldId, String)> {
|
||||
match primary_key {
|
||||
Some(primary_key) => {
|
||||
let id = fields_ids_map.id(primary_key).expect("primary key must be present in the fields id map");
|
||||
Ok((id, primary_key.to_string()))
|
||||
}
|
||||
None => {
|
||||
let name = match alternative_name {
|
||||
Some(key) => key,
|
||||
None => {
|
||||
if !autogenerate_docids {
|
||||
// If there is no primary key in the current document batch, we must
|
||||
// return an error and not automatically generate any document id.
|
||||
anyhow::bail!("missing primary key")
|
||||
}
|
||||
DEFAULT_PRIMARY_KEY_NAME.to_string()
|
||||
},
|
||||
};
|
||||
let id = fields_ids_map.insert(&name).context("field id limit reached")?;
|
||||
Ok((id, name))
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Only the last value associated with an id is kept.
|
||||
fn keep_latest_obkv(_key: &[u8], obkvs: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
|
||||
obkvs.last().context("no last value").map(|last| last.clone().into_owned())
|
||||
@ -552,3 +569,73 @@ fn validate_document_id(document_id: &str) -> Option<&str> {
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
mod compute_primary_key {
|
||||
use super::compute_primary_key_pair;
|
||||
use super::FieldsIdsMap;
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn should_panic_primary_key_not_in_map() {
|
||||
let mut fields_map = FieldsIdsMap::new();
|
||||
let _result = compute_primary_key_pair(
|
||||
Some("toto"),
|
||||
&mut fields_map,
|
||||
None,
|
||||
false);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_return_primary_key_if_is_some() {
|
||||
let mut fields_map = FieldsIdsMap::new();
|
||||
fields_map.insert("toto").unwrap();
|
||||
let result = compute_primary_key_pair(
|
||||
Some("toto"),
|
||||
&mut fields_map,
|
||||
Some("tata".to_string()),
|
||||
false);
|
||||
assert_eq!(result.unwrap(), (0u8, "toto".to_string()));
|
||||
assert_eq!(fields_map.len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_return_alternative_if_primary_is_none() {
|
||||
let mut fields_map = FieldsIdsMap::new();
|
||||
let result = compute_primary_key_pair(
|
||||
None,
|
||||
&mut fields_map,
|
||||
Some("tata".to_string()),
|
||||
false);
|
||||
assert_eq!(result.unwrap(), (0u8, "tata".to_string()));
|
||||
assert_eq!(fields_map.len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_return_default_if_both_are_none() {
|
||||
let mut fields_map = FieldsIdsMap::new();
|
||||
let result = compute_primary_key_pair(
|
||||
None,
|
||||
&mut fields_map,
|
||||
None,
|
||||
true);
|
||||
assert_eq!(result.unwrap(), (0u8, "id".to_string()));
|
||||
assert_eq!(fields_map.len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_return_err_if_both_are_none_and_recompute_is_false(){
|
||||
let mut fields_map = FieldsIdsMap::new();
|
||||
let result = compute_primary_key_pair(
|
||||
None,
|
||||
&mut fields_map,
|
||||
None,
|
||||
false);
|
||||
assert!(result.is_err());
|
||||
assert_eq!(fields_map.len(), 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,14 +1,16 @@
|
||||
use std::collections::HashMap;
|
||||
use std::str::FromStr;
|
||||
|
||||
use anyhow::{ensure, Context};
|
||||
use anyhow::Context;
|
||||
use grenad::CompressionType;
|
||||
use itertools::Itertools;
|
||||
use rayon::ThreadPool;
|
||||
|
||||
use crate::criterion::Criterion;
|
||||
use crate::facet::FacetType;
|
||||
use crate::update::index_documents::{Transform, IndexDocumentsMethod};
|
||||
use crate::update::{ClearDocuments, IndexDocuments, UpdateIndexingStep};
|
||||
use crate::facet::FacetType;
|
||||
use crate::{Index, FieldsIdsMap, Criterion};
|
||||
use crate::{Index, FieldsIdsMap};
|
||||
|
||||
pub struct Settings<'a, 't, 'u, 'i> {
|
||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||
@ -26,7 +28,7 @@ pub struct Settings<'a, 't, 'u, 'i> {
|
||||
// however if it is `Some(None)` it means that the user forced a reset of the setting.
|
||||
searchable_fields: Option<Option<Vec<String>>>,
|
||||
displayed_fields: Option<Option<Vec<String>>>,
|
||||
faceted_fields: Option<HashMap<String, String>>,
|
||||
faceted_fields: Option<Option<HashMap<String, String>>>,
|
||||
criteria: Option<Option<Vec<String>>>,
|
||||
}
|
||||
|
||||
@ -67,7 +69,11 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
}
|
||||
|
||||
pub fn set_faceted_fields(&mut self, names_facet_types: HashMap<String, String>) {
|
||||
self.faceted_fields = Some(names_facet_types);
|
||||
self.faceted_fields = Some(Some(names_facet_types));
|
||||
}
|
||||
|
||||
pub fn reset_faceted_fields(&mut self) {
|
||||
self.faceted_fields = Some(None);
|
||||
}
|
||||
|
||||
pub fn reset_criteria(&mut self) {
|
||||
@ -78,183 +84,188 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
self.criteria = Some(Some(criteria));
|
||||
}
|
||||
|
||||
pub fn execute<F>(self, progress_callback: F) -> anyhow::Result<()>
|
||||
fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> anyhow::Result<()>
|
||||
where
|
||||
F: Fn(UpdateIndexingStep) + Sync
|
||||
F: Fn(UpdateIndexingStep) + Sync,
|
||||
{
|
||||
let mut updated_searchable_fields = None;
|
||||
let mut updated_faceted_fields = None;
|
||||
let mut updated_displayed_fields = None;
|
||||
let mut updated_criteria = None;
|
||||
|
||||
// Construct the new FieldsIdsMap based on the searchable fields order.
|
||||
let fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||
let mut fields_ids_map = match self.searchable_fields {
|
||||
Some(Some(searchable_fields)) => {
|
||||
let mut new_fields_ids_map = FieldsIdsMap::new();
|
||||
let mut new_searchable_fields = Vec::new();
|
||||
// if the settings are set before any document update, we don't need to do anything, and
|
||||
// will set the primary key during the first document addition.
|
||||
if self.index.number_of_documents(&self.wtxn)? == 0 {
|
||||
return Ok(())
|
||||
}
|
||||
|
||||
for name in searchable_fields {
|
||||
let id = new_fields_ids_map.insert(&name).context("field id limit reached")?;
|
||||
new_searchable_fields.push(id);
|
||||
}
|
||||
|
||||
for (_, name) in fields_ids_map.iter() {
|
||||
new_fields_ids_map.insert(name).context("field id limit reached")?;
|
||||
}
|
||||
|
||||
updated_searchable_fields = Some(Some(new_searchable_fields));
|
||||
new_fields_ids_map
|
||||
},
|
||||
Some(None) => {
|
||||
updated_searchable_fields = Some(None);
|
||||
fields_ids_map
|
||||
},
|
||||
None => fields_ids_map,
|
||||
let transform = Transform {
|
||||
rtxn: &self.wtxn,
|
||||
index: self.index,
|
||||
log_every_n: self.log_every_n,
|
||||
chunk_compression_type: self.chunk_compression_type,
|
||||
chunk_compression_level: self.chunk_compression_level,
|
||||
chunk_fusing_shrink_size: self.chunk_fusing_shrink_size,
|
||||
max_nb_chunks: self.max_nb_chunks,
|
||||
max_memory: self.max_memory,
|
||||
index_documents_method: IndexDocumentsMethod::ReplaceDocuments,
|
||||
autogenerate_docids: false,
|
||||
};
|
||||
|
||||
// We compute or generate the new primary key field id.
|
||||
// TODO make the primary key settable.
|
||||
let primary_key = match self.index.primary_key(&self.wtxn)? {
|
||||
Some(id) => {
|
||||
let current_fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||
let name = current_fields_ids_map.name(id).unwrap();
|
||||
fields_ids_map.insert(name).context("field id limit reached")?
|
||||
},
|
||||
None => fields_ids_map.insert("id").context("field id limit reached")?,
|
||||
};
|
||||
// There already has been a document addition, the primary key should be set by now.
|
||||
let primary_key = self.index.primary_key(&self.wtxn)?.context("Index must have a primary key")?;
|
||||
|
||||
let current_faceted_fields = self.index.faceted_fields(self.wtxn)?;
|
||||
if let Some(fields_names_facet_types) = self.faceted_fields {
|
||||
let mut faceted_fields = HashMap::new();
|
||||
for (name, sftype) in fields_names_facet_types {
|
||||
let ftype = FacetType::from_str(&sftype).with_context(|| format!("parsing facet type {:?}", sftype))?;
|
||||
let id = fields_ids_map.insert(&name).context("field id limit reached")?;
|
||||
match current_faceted_fields.get(&id) {
|
||||
Some(pftype) => {
|
||||
ensure!(ftype == *pftype, "{} facet type changed from {} to {}", name, ftype, pftype);
|
||||
faceted_fields.insert(id, ftype)
|
||||
},
|
||||
None => faceted_fields.insert(id, ftype),
|
||||
};
|
||||
}
|
||||
// We remap the documents fields based on the new `FieldsIdsMap`.
|
||||
let output = transform.remap_index_documents(
|
||||
primary_key.to_string(),
|
||||
old_fields_ids_map,
|
||||
fields_ids_map.clone())?;
|
||||
|
||||
updated_faceted_fields = Some(faceted_fields);
|
||||
}
|
||||
|
||||
// Check that the displayed attributes have been specified.
|
||||
if let Some(value) = self.displayed_fields {
|
||||
match value {
|
||||
Some(names) => {
|
||||
let mut new_displayed_fields = Vec::new();
|
||||
for name in names {
|
||||
let id = fields_ids_map.insert(&name).context("field id limit reached")?;
|
||||
new_displayed_fields.push(id);
|
||||
}
|
||||
updated_displayed_fields = Some(Some(new_displayed_fields));
|
||||
}
|
||||
None => updated_displayed_fields = Some(None),
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(criteria) = self.criteria {
|
||||
match criteria {
|
||||
Some(criteria_names) => {
|
||||
let mut new_criteria = Vec::new();
|
||||
for name in criteria_names {
|
||||
let criterion = Criterion::from_str(&mut fields_ids_map, &name)?;
|
||||
if let Some(fid) = criterion.field_id() {
|
||||
let name = fields_ids_map.name(fid).unwrap();
|
||||
let faceted_fields = updated_faceted_fields.as_ref().unwrap_or(¤t_faceted_fields);
|
||||
ensure!(faceted_fields.contains_key(&fid), "criterion field {} must be faceted", name);
|
||||
}
|
||||
new_criteria.push(criterion);
|
||||
}
|
||||
updated_criteria = Some(Some(new_criteria));
|
||||
},
|
||||
None => updated_criteria = Some(None),
|
||||
}
|
||||
}
|
||||
|
||||
// If any setting have modified any of the datastructures it means that we need
|
||||
// to retrieve the documents and then reindex then with the new settings.
|
||||
if updated_searchable_fields.is_some() || updated_faceted_fields.is_some() {
|
||||
let transform = Transform {
|
||||
rtxn: &self.wtxn,
|
||||
index: self.index,
|
||||
log_every_n: self.log_every_n,
|
||||
chunk_compression_type: self.chunk_compression_type,
|
||||
chunk_compression_level: self.chunk_compression_level,
|
||||
chunk_fusing_shrink_size: self.chunk_fusing_shrink_size,
|
||||
max_nb_chunks: self.max_nb_chunks,
|
||||
max_memory: self.max_memory,
|
||||
index_documents_method: IndexDocumentsMethod::ReplaceDocuments,
|
||||
autogenerate_docids: false,
|
||||
};
|
||||
|
||||
// We remap the documents fields based on the new `FieldsIdsMap`.
|
||||
let output = transform.remap_index_documents(primary_key, fields_ids_map.clone())?;
|
||||
|
||||
// We write the new FieldsIdsMap to the database
|
||||
// this way next indexing methods will be based on that.
|
||||
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
|
||||
|
||||
if let Some(faceted_fields) = updated_faceted_fields {
|
||||
// We write the faceted_fields fields into the database here.
|
||||
self.index.put_faceted_fields(self.wtxn, &faceted_fields)?;
|
||||
}
|
||||
|
||||
if let Some(searchable_fields) = updated_searchable_fields {
|
||||
// The new searchable fields are also written down to make sure
|
||||
// that the IndexDocuments system takes only these ones into account.
|
||||
match searchable_fields {
|
||||
Some(fields) => self.index.put_searchable_fields(self.wtxn, &fields)?,
|
||||
None => self.index.delete_searchable_fields(self.wtxn).map(drop)?,
|
||||
}
|
||||
}
|
||||
|
||||
// We clear the full database (words-fst, documents ids and documents content).
|
||||
ClearDocuments::new(self.wtxn, self.index).execute()?;
|
||||
|
||||
// We index the generated `TransformOutput` which must contain
|
||||
// all the documents with fields in the newly defined searchable order.
|
||||
let mut indexing_builder = IndexDocuments::new(self.wtxn, self.index);
|
||||
indexing_builder.log_every_n = self.log_every_n;
|
||||
indexing_builder.max_nb_chunks = self.max_nb_chunks;
|
||||
indexing_builder.max_memory = self.max_memory;
|
||||
indexing_builder.linked_hash_map_size = self.linked_hash_map_size;
|
||||
indexing_builder.chunk_compression_type = self.chunk_compression_type;
|
||||
indexing_builder.chunk_compression_level = self.chunk_compression_level;
|
||||
indexing_builder.chunk_fusing_shrink_size = self.chunk_fusing_shrink_size;
|
||||
indexing_builder.thread_pool = self.thread_pool;
|
||||
indexing_builder.execute_raw(output, &progress_callback)?;
|
||||
}
|
||||
|
||||
if let Some(displayed_fields) = updated_displayed_fields {
|
||||
match displayed_fields {
|
||||
Some(fields) => self.index.put_displayed_fields(self.wtxn, &fields)?,
|
||||
None => self.index.delete_displayed_fields(self.wtxn).map(drop)?,
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(criteria) = updated_criteria {
|
||||
match criteria {
|
||||
Some(criteria) => self.index.put_criteria(self.wtxn, &criteria)?,
|
||||
None => self.index.delete_criteria(self.wtxn).map(drop)?,
|
||||
}
|
||||
}
|
||||
// We clear the full database (words-fst, documents ids and documents content).
|
||||
ClearDocuments::new(self.wtxn, self.index).execute()?;
|
||||
|
||||
// We index the generated `TransformOutput` which must contain
|
||||
// all the documents with fields in the newly defined searchable order.
|
||||
let mut indexing_builder = IndexDocuments::new(self.wtxn, self.index);
|
||||
indexing_builder.log_every_n = self.log_every_n;
|
||||
indexing_builder.max_nb_chunks = self.max_nb_chunks;
|
||||
indexing_builder.max_memory = self.max_memory;
|
||||
indexing_builder.linked_hash_map_size = self.linked_hash_map_size;
|
||||
indexing_builder.chunk_compression_type = self.chunk_compression_type;
|
||||
indexing_builder.chunk_compression_level = self.chunk_compression_level;
|
||||
indexing_builder.chunk_fusing_shrink_size = self.chunk_fusing_shrink_size;
|
||||
indexing_builder.thread_pool = self.thread_pool;
|
||||
indexing_builder.execute_raw(output, &cb)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn update_displayed(&mut self) -> anyhow::Result<bool> {
|
||||
match self.displayed_fields {
|
||||
Some(Some(ref fields)) => {
|
||||
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||
// fields are deduplicated, only the first occurrence is taken into account
|
||||
let names: Vec<_> = fields
|
||||
.iter()
|
||||
.unique()
|
||||
.map(String::as_str)
|
||||
.collect();
|
||||
|
||||
for name in names.iter() {
|
||||
fields_ids_map
|
||||
.insert(name)
|
||||
.context("field id limit exceeded")?;
|
||||
}
|
||||
self.index.put_displayed_fields(self.wtxn, &names)?;
|
||||
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
|
||||
}
|
||||
Some(None) => { self.index.delete_displayed_fields(self.wtxn)?; },
|
||||
None => return Ok(false),
|
||||
}
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
/// Udpates the index's searchable attributes. This causes the field map to be recomputed to
|
||||
/// reflect the order of the searchable attributes.
|
||||
fn update_searchable(&mut self) -> anyhow::Result<bool> {
|
||||
match self.searchable_fields {
|
||||
Some(Some(ref fields)) => {
|
||||
// every time the searchable attributes are updated, we need to update the
|
||||
// ids for any settings that uses the facets. (displayed_fields,
|
||||
// faceted_fields)
|
||||
let old_fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||
|
||||
let mut new_fields_ids_map = FieldsIdsMap::new();
|
||||
// fields are deduplicated, only the first occurrence is taken into account
|
||||
let names = fields
|
||||
.iter()
|
||||
.unique()
|
||||
.map(String::as_str)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// Add all the searchable attributes to the field map, and then add the
|
||||
// remaining fields from the old field map to the new one
|
||||
for name in names.iter() {
|
||||
new_fields_ids_map
|
||||
.insert(&name)
|
||||
.context("field id limit exceeded")?;
|
||||
}
|
||||
|
||||
for (_, name) in old_fields_ids_map.iter() {
|
||||
new_fields_ids_map
|
||||
.insert(&name)
|
||||
.context("field id limit exceeded")?;
|
||||
}
|
||||
|
||||
self.index.put_searchable_fields(self.wtxn, &names)?;
|
||||
self.index.put_fields_ids_map(self.wtxn, &new_fields_ids_map)?;
|
||||
}
|
||||
Some(None) => { self.index.delete_searchable_fields(self.wtxn)?; },
|
||||
None => return Ok(false),
|
||||
}
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
fn update_facets(&mut self) -> anyhow::Result<bool> {
|
||||
match self.faceted_fields {
|
||||
Some(Some(ref fields)) => {
|
||||
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||
let mut new_facets = HashMap::new();
|
||||
for (name, ty) in fields {
|
||||
fields_ids_map.insert(name).context("field id limit exceeded")?;
|
||||
let ty = FacetType::from_str(&ty)?;
|
||||
new_facets.insert(name.clone(), ty);
|
||||
}
|
||||
self.index.put_faceted_fields(self.wtxn, &new_facets)?;
|
||||
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
|
||||
}
|
||||
Some(None) => { self.index.delete_faceted_fields(self.wtxn)?; },
|
||||
None => return Ok(false)
|
||||
}
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
fn update_criteria(&mut self) -> anyhow::Result<()> {
|
||||
match self.criteria {
|
||||
Some(Some(ref fields)) => {
|
||||
let faceted_fields = self.index.faceted_fields(&self.wtxn)?;
|
||||
let mut new_criteria = Vec::new();
|
||||
for name in fields {
|
||||
let criterion = Criterion::from_str(&faceted_fields, &name)?;
|
||||
new_criteria.push(criterion);
|
||||
}
|
||||
self.index.put_criteria(self.wtxn, &new_criteria)?;
|
||||
}
|
||||
Some(None) => { self.index.delete_criteria(self.wtxn)?; }
|
||||
None => (),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn execute<F>(mut self, progress_callback: F) -> anyhow::Result<()>
|
||||
where
|
||||
F: Fn(UpdateIndexingStep) + Sync
|
||||
{
|
||||
let old_fields_ids_map = self.index.fields_ids_map(&self.wtxn)?;
|
||||
self.update_displayed()?;
|
||||
let facets_updated = self.update_facets()?;
|
||||
// update_criteria MUST be called after update_facets, since criterion fields must be set
|
||||
// as facets.
|
||||
self.update_criteria()?;
|
||||
let searchable_updated = self.update_searchable()?;
|
||||
|
||||
if facets_updated || searchable_updated {
|
||||
self.reindex(&progress_callback, old_fields_ids_map)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::update::{IndexDocuments, UpdateFormat};
|
||||
|
||||
use heed::EnvOpenOptions;
|
||||
use maplit::hashmap;
|
||||
|
||||
use crate::facet::FacetType;
|
||||
use crate::update::{IndexDocuments, UpdateFormat};
|
||||
|
||||
#[test]
|
||||
fn set_and_reset_searchable_fields() {
|
||||
let path = tempfile::tempdir().unwrap();
|
||||
@ -336,10 +347,8 @@ mod tests {
|
||||
|
||||
// Check that the displayed fields are correctly set to `None` (default value).
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let fields_ids = index.displayed_fields(&rtxn).unwrap();
|
||||
let age_id = fields_ids_map.id("age").unwrap();
|
||||
assert_eq!(fields_ids, Some(&[age_id][..]));
|
||||
assert_eq!(fields_ids.unwrap(), (&["age"][..]));
|
||||
drop(rtxn);
|
||||
|
||||
// We change the searchable fields to be the "name" field only.
|
||||
@ -351,10 +360,8 @@ mod tests {
|
||||
|
||||
// Check that the displayed fields always contains only the "age" field.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let fields_ids = index.displayed_fields(&rtxn).unwrap();
|
||||
let age_id = fields_ids_map.id("age").unwrap();
|
||||
assert_eq!(fields_ids, Some(&[age_id][..]));
|
||||
assert_eq!(fields_ids.unwrap(), &["age"][..]);
|
||||
drop(rtxn);
|
||||
}
|
||||
|
||||
@ -402,10 +409,8 @@ mod tests {
|
||||
|
||||
// Check that the displayed fields are correctly set to only the "age" field.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let age_field_id = fields_ids_map.id("age").unwrap();
|
||||
let fields_ids = index.displayed_fields(&rtxn).unwrap();
|
||||
assert_eq!(fields_ids.unwrap(), &[age_field_id][..]);
|
||||
assert_eq!(fields_ids.unwrap(), &["age"][..]);
|
||||
drop(rtxn);
|
||||
|
||||
// We reset the fields ids to become `None`, the default value.
|
||||
@ -445,9 +450,9 @@ mod tests {
|
||||
// Check that the displayed fields are correctly set.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let fields_ids = index.faceted_fields(&rtxn).unwrap();
|
||||
assert_eq!(fields_ids, hashmap!{ 1 => FacetType::Integer });
|
||||
assert_eq!(fields_ids, hashmap!{ "age".to_string() => FacetType::Integer });
|
||||
// Only count the field_id 0 and level 0 facet values.
|
||||
let count = index.facet_field_id_value_docids.prefix_iter(&rtxn, &[1, 0]).unwrap().count();
|
||||
let count = index.facet_field_id_value_docids.prefix_iter(&rtxn, &[0, 0]).unwrap().count();
|
||||
assert_eq!(count, 3);
|
||||
drop(rtxn);
|
||||
|
||||
@ -461,8 +466,49 @@ mod tests {
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
// Only count the field_id 0 and level 0 facet values.
|
||||
let count = index.facet_field_id_value_docids.prefix_iter(&rtxn, &[1, 0]).unwrap().count();
|
||||
let count = index.facet_field_id_value_docids.prefix_iter(&rtxn, &[0, 0]).unwrap().count();
|
||||
assert_eq!(count, 4);
|
||||
drop(rtxn);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn setting_searchable_recomputes_other_settings() {
|
||||
let path = tempfile::tempdir().unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||
let index = Index::new(options, &path).unwrap();
|
||||
|
||||
// Set all the settings except searchable
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index);
|
||||
builder.set_displayed_fields(vec!["hello".to_string()]);
|
||||
builder.set_faceted_fields(hashmap!{
|
||||
"age".into() => "integer".into(),
|
||||
"toto".into() => "integer".into(),
|
||||
});
|
||||
builder.set_criteria(vec!["asc(toto)".to_string()]);
|
||||
builder.execute(|_| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
// check the output
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
assert_eq!(&["hello"][..], index.displayed_fields(&rtxn).unwrap().unwrap());
|
||||
// since no documents have been pushed the primary key is still unset
|
||||
assert!(index.primary_key(&rtxn).unwrap().is_none());
|
||||
assert_eq!(vec![Criterion::Asc("toto".to_string())], index.criteria(&rtxn).unwrap());
|
||||
drop(rtxn);
|
||||
|
||||
// We set toto and age as searchable to force reordering of the fields
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index);
|
||||
builder.set_searchable_fields(vec!["toto".to_string(), "age".to_string()]);
|
||||
builder.execute(|_| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
assert_eq!(&["hello"][..], index.displayed_fields(&rtxn).unwrap().unwrap());
|
||||
assert!(index.primary_key(&rtxn).unwrap().is_none());
|
||||
assert_eq!(vec![Criterion::Asc("toto".to_string())], index.criteria(&rtxn).unwrap());
|
||||
drop(rtxn);
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user