mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-05 12:38:55 +01:00
Merge pull request #67 from meilisearch/fix-settings
Fix displayed and searchable attributes
This commit is contained in:
commit
30dae0205e
481
Cargo.lock
generated
481
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
632
http-ui/Cargo.lock
generated
632
http-ui/Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@ -1,4 +1,3 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::fmt::Display;
|
||||
use std::fs::{File, create_dir_all};
|
||||
@ -654,13 +653,13 @@ async fn main() -> anyhow::Result<()> {
|
||||
|
||||
let mut documents = Vec::new();
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let displayed_fields = match index.displayed_fields(&rtxn).unwrap() {
|
||||
Some(fields) => Cow::Borrowed(fields),
|
||||
None => Cow::Owned(fields_ids_map.iter().map(|(id, _)| id).collect()),
|
||||
let displayed_fields = match index.displayed_fields_ids(&rtxn).unwrap() {
|
||||
Some(fields) => fields,
|
||||
None => fields_ids_map.iter().map(|(id, _)| id).collect(),
|
||||
};
|
||||
let attributes_to_highlight = match index.searchable_fields(&rtxn).unwrap() {
|
||||
Some(fields) => fields.iter().flat_map(|id| fields_ids_map.name(*id)).map(ToOwned::to_owned).collect(),
|
||||
None => fields_ids_map.iter().map(|(_, name)| name).map(ToOwned::to_owned).collect(),
|
||||
Some(fields) => fields.into_iter().map(String::from).collect(),
|
||||
None => fields_ids_map.iter().map(|(_, name)| name).map(String::from).collect(),
|
||||
};
|
||||
|
||||
let stop_words = fst::Set::default();
|
||||
@ -690,9 +689,9 @@ async fn main() -> anyhow::Result<()> {
|
||||
|
||||
let external_documents_ids = index.external_documents_ids(&rtxn).unwrap();
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let displayed_fields = match index.displayed_fields(&rtxn).unwrap() {
|
||||
Some(fields) => Cow::Borrowed(fields),
|
||||
None => Cow::Owned(fields_ids_map.iter().map(|(id, _)| id).collect()),
|
||||
let displayed_fields = match index.displayed_fields_ids(&rtxn).unwrap() {
|
||||
Some(fields) => fields,
|
||||
None => fields_ids_map.iter().map(|(id, _)| id).collect(),
|
||||
};
|
||||
|
||||
match external_documents_ids.get(&id) {
|
||||
|
@ -1,10 +1,12 @@
|
||||
use crate::{FieldsIdsMap, FieldId};
|
||||
use std::collections::HashMap;
|
||||
|
||||
use anyhow::{Context, bail};
|
||||
use regex::Regex;
|
||||
use serde::{Serialize, Deserialize};
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Copy, Clone, PartialEq, Eq)]
|
||||
use crate::facet::FacetType;
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
|
||||
pub enum Criterion {
|
||||
/// Sorted by increasing number of typos.
|
||||
Typo,
|
||||
@ -21,13 +23,13 @@ pub enum Criterion {
|
||||
/// Sorted by the similarity of the matched words with the query words.
|
||||
Exactness,
|
||||
/// Sorted by the increasing value of the field specified.
|
||||
Asc(FieldId),
|
||||
Asc(String),
|
||||
/// Sorted by the decreasing value of the field specified.
|
||||
Desc(FieldId),
|
||||
Desc(String),
|
||||
}
|
||||
|
||||
impl Criterion {
|
||||
pub fn from_str(fields_ids_map: &mut FieldsIdsMap, txt: &str) -> anyhow::Result<Criterion> {
|
||||
pub fn from_str(faceted_attributes: &HashMap<String, FacetType>, txt: &str) -> anyhow::Result<Criterion> {
|
||||
match txt {
|
||||
"typo" => Ok(Criterion::Typo),
|
||||
"words" => Ok(Criterion::Words),
|
||||
@ -40,22 +42,15 @@ impl Criterion {
|
||||
let caps = re.captures(text).with_context(|| format!("unknown criterion name: {}", text))?;
|
||||
let order = caps.get(1).unwrap().as_str();
|
||||
let field_name = caps.get(2).unwrap().as_str();
|
||||
let field_id = fields_ids_map.insert(field_name).context("field id limit reached")?;
|
||||
faceted_attributes.get(field_name).with_context(|| format!("Can't use {:?} as a criterion as it isn't a faceted field.", field_name))?;
|
||||
match order {
|
||||
"asc" => Ok(Criterion::Asc(field_id)),
|
||||
"desc" => Ok(Criterion::Desc(field_id)),
|
||||
"asc" => Ok(Criterion::Asc(field_name.to_string())),
|
||||
"desc" => Ok(Criterion::Desc(field_name.to_string())),
|
||||
otherwise => bail!("unknown criterion name: {}", otherwise),
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn field_id(&self) -> Option<FieldId> {
|
||||
match *self {
|
||||
Criterion::Asc(fid) | Criterion::Desc(fid) => Some(fid),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn default_criteria() -> Vec<Criterion> {
|
||||
|
89
src/index.rs
89
src/index.rs
@ -112,8 +112,8 @@ impl Index {
|
||||
/* primary key */
|
||||
|
||||
/// Writes the documents primary key, this is the field name that is used to store the id.
|
||||
pub fn put_primary_key(&self, wtxn: &mut RwTxn, primary_key: FieldId) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, OwnedType<FieldId>>(wtxn, PRIMARY_KEY_KEY, &primary_key)
|
||||
pub fn put_primary_key(&self, wtxn: &mut RwTxn, primary_key: &str) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, Str>(wtxn, PRIMARY_KEY_KEY, &primary_key)
|
||||
}
|
||||
|
||||
/// Deletes the primary key of the documents, this can be done to reset indexes settings.
|
||||
@ -122,8 +122,8 @@ impl Index {
|
||||
}
|
||||
|
||||
/// Returns the documents primary key, `None` if it hasn't been defined.
|
||||
pub fn primary_key(&self, rtxn: &RoTxn) -> heed::Result<Option<FieldId>> {
|
||||
self.main.get::<_, Str, OwnedType<FieldId>>(rtxn, PRIMARY_KEY_KEY)
|
||||
pub fn primary_key<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<&'t str>> {
|
||||
self.main.get::<_, Str, Str>(rtxn, PRIMARY_KEY_KEY)
|
||||
}
|
||||
|
||||
/* external documents ids */
|
||||
@ -175,10 +175,10 @@ impl Index {
|
||||
|
||||
/* displayed fields */
|
||||
|
||||
/// Writes the fields ids that must be displayed in the defined order.
|
||||
/// Writes the fields that must be displayed in the defined order.
|
||||
/// There must be not be any duplicate field id.
|
||||
pub fn put_displayed_fields(&self, wtxn: &mut RwTxn, fields: &[FieldId]) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, ByteSlice>(wtxn, DISPLAYED_FIELDS_KEY, fields)
|
||||
pub fn put_displayed_fields(&self, wtxn: &mut RwTxn, fields: &[&str]) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeBincode<&[&str]>>(wtxn, DISPLAYED_FIELDS_KEY, &fields)
|
||||
}
|
||||
|
||||
/// Deletes the displayed fields ids, this will make the engine to display
|
||||
@ -187,18 +187,27 @@ impl Index {
|
||||
self.main.delete::<_, Str>(wtxn, DISPLAYED_FIELDS_KEY)
|
||||
}
|
||||
|
||||
/// Returns the displayed fields ids in the order they must be returned. If it returns
|
||||
/// `None` it means that all the attributes are displayed in the order of the `FieldsIdsMap`.
|
||||
pub fn displayed_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<&'t [FieldId]>> {
|
||||
self.main.get::<_, Str, ByteSlice>(rtxn, DISPLAYED_FIELDS_KEY)
|
||||
/// Returns the displayed fields in the order they were set by the user. If it returns
|
||||
/// `None` it means that all the attributes are set as displayed in the order of the `FieldsIdsMap`.
|
||||
pub fn displayed_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<Vec<&'t str>>> {
|
||||
self.main.get::<_, Str, SerdeBincode<Vec<&'t str>>>(rtxn, DISPLAYED_FIELDS_KEY)
|
||||
}
|
||||
|
||||
pub fn displayed_fields_ids(&self, rtxn: &RoTxn) -> heed::Result<Option<Vec<FieldId>>> {
|
||||
let fields_ids_map = self.fields_ids_map(rtxn)?;
|
||||
let ids = self.displayed_fields(rtxn)?
|
||||
.map(|fields| fields
|
||||
.into_iter()
|
||||
.map(|name| fields_ids_map.id(name).expect("Field not found"))
|
||||
.collect::<Vec<_>>());
|
||||
Ok(ids)
|
||||
}
|
||||
|
||||
/* searchable fields */
|
||||
|
||||
/// Writes the searchable fields, when this list is specified, only these are indexed.
|
||||
pub fn put_searchable_fields(&self, wtxn: &mut RwTxn, fields: &[FieldId]) -> heed::Result<()> {
|
||||
assert!(fields.windows(2).all(|win| win[0] < win[1])); // is sorted
|
||||
self.main.put::<_, Str, ByteSlice>(wtxn, SEARCHABLE_FIELDS_KEY, fields)
|
||||
pub fn put_searchable_fields(&self, wtxn: &mut RwTxn, fields: &[&str]) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeBincode<&[&str]>>(wtxn, SEARCHABLE_FIELDS_KEY, &fields)
|
||||
}
|
||||
|
||||
/// Deletes the searchable fields, when no fields are specified, all fields are indexed.
|
||||
@ -206,17 +215,36 @@ impl Index {
|
||||
self.main.delete::<_, Str>(wtxn, SEARCHABLE_FIELDS_KEY)
|
||||
}
|
||||
|
||||
/// Returns the searchable fields ids, those are the fields that are indexed,
|
||||
/// Returns the searchable fields, those are the fields that are indexed,
|
||||
/// if the searchable fields aren't there it means that **all** the fields are indexed.
|
||||
pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<&'t [FieldId]>> {
|
||||
self.main.get::<_, Str, ByteSlice>(rtxn, SEARCHABLE_FIELDS_KEY)
|
||||
pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<Vec<&'t str>>> {
|
||||
self.main.get::<_, Str, SerdeBincode<Vec<&'t str>>>(rtxn, SEARCHABLE_FIELDS_KEY)
|
||||
}
|
||||
|
||||
/// Identical to `searchable_fields`, but returns the ids instead.
|
||||
pub fn searchable_fields_ids(&self, rtxn: &RoTxn) -> heed::Result<Option<Vec<FieldId>>> {
|
||||
match self.searchable_fields(rtxn)? {
|
||||
Some(names) => {
|
||||
let fields_map = self.fields_ids_map(rtxn)?;
|
||||
let mut ids = Vec::new();
|
||||
for name in names {
|
||||
let id = fields_map
|
||||
.id(name)
|
||||
.ok_or_else(|| format!("field id map must contain {:?}", name))
|
||||
.expect("corrupted data: ");
|
||||
ids.push(id);
|
||||
}
|
||||
Ok(Some(ids))
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
/* faceted fields */
|
||||
|
||||
/// Writes the facet fields ids associated with their facet type or `None` if
|
||||
/// Writes the facet fields associated with their facet type or `None` if
|
||||
/// the facet type is currently unknown.
|
||||
pub fn put_faceted_fields(&self, wtxn: &mut RwTxn, fields_types: &HashMap<FieldId, FacetType>) -> heed::Result<()> {
|
||||
pub fn put_faceted_fields(&self, wtxn: &mut RwTxn, fields_types: &HashMap<String, FacetType>) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeJson<_>>(wtxn, FACETED_FIELDS_KEY, fields_types)
|
||||
}
|
||||
|
||||
@ -225,9 +253,26 @@ impl Index {
|
||||
self.main.delete::<_, Str>(wtxn, FACETED_FIELDS_KEY)
|
||||
}
|
||||
|
||||
/// Returns the facet fields ids associated with their facet type.
|
||||
pub fn faceted_fields(&self, wtxn: &RoTxn) -> heed::Result<HashMap<FieldId, FacetType>> {
|
||||
Ok(self.main.get::<_, Str, SerdeJson<_>>(wtxn, FACETED_FIELDS_KEY)?.unwrap_or_default())
|
||||
/// Returns the facet fields names associated with their facet type.
|
||||
pub fn faceted_fields(&self, rtxn: &RoTxn) -> heed::Result<HashMap<String, FacetType>> {
|
||||
Ok(self.main.get::<_, Str, SerdeJson<_>>(rtxn, FACETED_FIELDS_KEY)?.unwrap_or_default())
|
||||
}
|
||||
|
||||
/// Same as `faceted_fields`, but returns ids instead.
|
||||
pub fn faceted_fields_ids(&self, rtxn: &RoTxn) -> heed::Result<HashMap<FieldId, FacetType>> {
|
||||
let faceted_fields = self.faceted_fields(rtxn)?;
|
||||
let fields_ids_map = self.fields_ids_map(rtxn)?;
|
||||
let faceted_fields = faceted_fields
|
||||
.iter()
|
||||
.map(|(k, v)| {
|
||||
let kid = fields_ids_map
|
||||
.id(k)
|
||||
.ok_or_else(|| format!("{:?} should be present in the field id map", k))
|
||||
.expect("corrupted data: ");
|
||||
(kid, *v)
|
||||
})
|
||||
.collect();
|
||||
Ok(faceted_fields)
|
||||
}
|
||||
|
||||
/* faceted documents ids */
|
||||
|
@ -148,7 +148,7 @@ impl FacetCondition {
|
||||
) -> anyhow::Result<FacetCondition>
|
||||
{
|
||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||
let faceted_fields = index.faceted_fields(rtxn)?;
|
||||
let faceted_fields = index.faceted_fields_ids(rtxn)?;
|
||||
let lexed = FilterParser::parse(Rule::prgm, expression)?;
|
||||
FacetCondition::from_pairs(&fields_ids_map, &faceted_fields, lexed)
|
||||
}
|
||||
@ -552,15 +552,15 @@ mod tests {
|
||||
// Test that the facet condition is correctly generated.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let condition = FacetCondition::from_str(&rtxn, &index, "channel = ponce").unwrap();
|
||||
let expected = OperatorString(1, FacetStringOperator::equal("Ponce"));
|
||||
let expected = OperatorString(0, FacetStringOperator::equal("Ponce"));
|
||||
assert_eq!(condition, expected);
|
||||
|
||||
let condition = FacetCondition::from_str(&rtxn, &index, "channel != ponce").unwrap();
|
||||
let expected = OperatorString(1, FacetStringOperator::not_equal("ponce"));
|
||||
let expected = OperatorString(0, FacetStringOperator::not_equal("ponce"));
|
||||
assert_eq!(condition, expected);
|
||||
|
||||
let condition = FacetCondition::from_str(&rtxn, &index, "NOT channel = ponce").unwrap();
|
||||
let expected = OperatorString(1, FacetStringOperator::not_equal("ponce"));
|
||||
let expected = OperatorString(0, FacetStringOperator::not_equal("ponce"));
|
||||
assert_eq!(condition, expected);
|
||||
}
|
||||
|
||||
@ -581,13 +581,13 @@ mod tests {
|
||||
// Test that the facet condition is correctly generated.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let condition = FacetCondition::from_str(&rtxn, &index, "timestamp 22 TO 44").unwrap();
|
||||
let expected = OperatorI64(1, Between(22, 44));
|
||||
let expected = OperatorI64(0, Between(22, 44));
|
||||
assert_eq!(condition, expected);
|
||||
|
||||
let condition = FacetCondition::from_str(&rtxn, &index, "NOT timestamp 22 TO 44").unwrap();
|
||||
let expected = Or(
|
||||
Box::new(OperatorI64(1, LowerThan(22))),
|
||||
Box::new(OperatorI64(1, GreaterThan(44))),
|
||||
Box::new(OperatorI64(0, LowerThan(22))),
|
||||
Box::new(OperatorI64(0, GreaterThan(44))),
|
||||
);
|
||||
assert_eq!(condition, expected);
|
||||
}
|
||||
|
@ -285,9 +285,13 @@ impl<'a> Search<'a> {
|
||||
}
|
||||
}).next();
|
||||
match result {
|
||||
Some((fid, is_ascending)) => {
|
||||
let faceted_fields = self.index.faceted_fields(self.rtxn)?;
|
||||
let ftype = *faceted_fields.get(&fid).context("unknown field id")?;
|
||||
Some((attr_name, is_ascending)) => {
|
||||
let field_id_map = self.index.fields_ids_map(self.rtxn)?;
|
||||
let fid = field_id_map.id(&attr_name).with_context(|| format!("unknown field: {:?}", attr_name))?;
|
||||
let faceted_fields = self.index.faceted_fields_ids(self.rtxn)?;
|
||||
let ftype = *faceted_fields.get(&fid)
|
||||
.with_context(|| format!("{:?} not found in the faceted fields.", attr_name))
|
||||
.expect("corrupted data: ");
|
||||
Some((fid, ftype, is_ascending))
|
||||
},
|
||||
None => None,
|
||||
|
@ -342,7 +342,7 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho
|
||||
if heap.len() > limit { heap.pop(); }
|
||||
}
|
||||
|
||||
let faceted_fields = index.faceted_fields(rtxn)?;
|
||||
let faceted_fields = index.faceted_fields_ids(rtxn)?;
|
||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||
for (field_id, field_type) in faceted_fields {
|
||||
let facet_name = fields_ids_map.name(field_id).unwrap();
|
||||
@ -413,7 +413,7 @@ fn words_docids(index: &Index, rtxn: &heed::RoTxn, debug: bool, words: Vec<Strin
|
||||
|
||||
fn facet_values_docids(index: &Index, rtxn: &heed::RoTxn, debug: bool, field_name: String) -> anyhow::Result<()> {
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||
let faceted_fields = index.faceted_fields(&rtxn)?;
|
||||
let faceted_fields = index.faceted_fields_ids(&rtxn)?;
|
||||
|
||||
let field_id = fields_ids_map.id(&field_name)
|
||||
.with_context(|| format!("field {} not found", field_name))?;
|
||||
@ -451,7 +451,7 @@ fn facet_values_docids(index: &Index, rtxn: &heed::RoTxn, debug: bool, field_nam
|
||||
|
||||
fn facet_stats(index: &Index, rtxn: &heed::RoTxn, field_name: String) -> anyhow::Result<()> {
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||
let faceted_fields = index.faceted_fields(&rtxn)?;
|
||||
let faceted_fields = index.faceted_fields_ids(&rtxn)?;
|
||||
|
||||
let field_id = fields_ids_map.id(&field_name)
|
||||
.with_context(|| format!("field {} not found", field_name))?;
|
||||
|
@ -1,4 +1,3 @@
|
||||
use std::borrow::Cow;
|
||||
use std::io::{self, BufRead, Write};
|
||||
use std::iter::once;
|
||||
use std::path::PathBuf;
|
||||
@ -47,9 +46,9 @@ pub fn run(opt: Opt) -> anyhow::Result<()> {
|
||||
let index = Index::new(options, &opt.database)?;
|
||||
let rtxn = index.read_txn()?;
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||
let displayed_fields = match index.displayed_fields(&rtxn)? {
|
||||
Some(fields) => Cow::Borrowed(fields),
|
||||
None => Cow::Owned(fields_ids_map.iter().map(|(id, _)| id).collect()),
|
||||
let displayed_fields = match index.displayed_fields_ids(&rtxn)? {
|
||||
Some(fields) => fields,
|
||||
None => fields_ids_map.iter().map(|(id, _)| id).collect(),
|
||||
};
|
||||
|
||||
let stdin = io::stdin();
|
||||
|
@ -25,7 +25,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
|
||||
|
||||
// We retrieve the number of documents ids that we are deleting.
|
||||
let number_of_documents = self.index.number_of_documents(self.wtxn)?;
|
||||
let faceted_fields = self.index.faceted_fields(self.wtxn)?;
|
||||
let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?;
|
||||
|
||||
// We clean some of the main engine datastructures.
|
||||
self.index.put_words_fst(self.wtxn, &fst::Set::default())?;
|
||||
|
@ -188,7 +188,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
||||
drop(iter);
|
||||
|
||||
// Remove the documents ids from the faceted documents ids.
|
||||
let faceted_fields = self.index.faceted_fields(self.wtxn)?;
|
||||
let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?;
|
||||
for (field_id, facet_type) in faceted_fields {
|
||||
let mut docids = self.index.faceted_documents_ids(self.wtxn, field_id)?;
|
||||
docids.difference_with(&self.documents_ids);
|
||||
|
@ -51,7 +51,7 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
|
||||
|
||||
pub fn execute(self) -> anyhow::Result<()> {
|
||||
// We get the faceted fields to be able to create the facet levels.
|
||||
let faceted_fields = self.index.faceted_fields(self.wtxn)?;
|
||||
let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?;
|
||||
|
||||
debug!("Computing and writing the facet values levels docids into LMDB on disk...");
|
||||
for (field_id, facet_type) in faceted_fields {
|
||||
|
@ -338,8 +338,8 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
||||
FacetLevel0ValuesDocids,
|
||||
}
|
||||
|
||||
let faceted_fields = self.index.faceted_fields(self.wtxn)?;
|
||||
let searchable_fields: HashSet<_> = match self.index.searchable_fields(self.wtxn)? {
|
||||
let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?;
|
||||
let searchable_fields: HashSet<_> = match self.index.searchable_fields_ids(self.wtxn)? {
|
||||
Some(fields) => fields.iter().copied().collect(),
|
||||
None => fields_ids_map.iter().map(|(id, _name)| id).collect(),
|
||||
};
|
||||
@ -485,7 +485,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
||||
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
|
||||
|
||||
// We write the primary key field id into the main database
|
||||
self.index.put_primary_key(self.wtxn, primary_key)?;
|
||||
self.index.put_primary_key(self.wtxn, &primary_key)?;
|
||||
|
||||
// We write the external documents ids into the main database.
|
||||
self.index.put_external_documents_ids(self.wtxn, &external_documents_ids)?;
|
||||
|
@ -10,13 +10,15 @@ use log::info;
|
||||
use roaring::RoaringBitmap;
|
||||
use serde_json::{Map, Value};
|
||||
|
||||
use crate::{BEU32, MergeFn, Index, FieldId, FieldsIdsMap, ExternalDocumentsIds};
|
||||
use crate::{Index, BEU32, MergeFn, FieldsIdsMap, ExternalDocumentsIds, FieldId};
|
||||
use crate::update::{AvailableDocumentsIds, UpdateIndexingStep};
|
||||
use super::merge_function::merge_two_obkvs;
|
||||
use super::{create_writer, create_sorter, IndexDocumentsMethod};
|
||||
|
||||
const DEFAULT_PRIMARY_KEY_NAME: &str = "id";
|
||||
|
||||
pub struct TransformOutput {
|
||||
pub primary_key: FieldId,
|
||||
pub primary_key: String,
|
||||
pub fields_ids_map: FieldsIdsMap,
|
||||
pub external_documents_ids: ExternalDocumentsIds<'static>,
|
||||
pub new_documents_ids: RoaringBitmap,
|
||||
@ -73,7 +75,6 @@ impl Transform<'_, '_> {
|
||||
{
|
||||
let mut fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
|
||||
let external_documents_ids = self.index.external_documents_ids(self.rtxn).unwrap();
|
||||
let primary_key = self.index.primary_key(self.rtxn)?;
|
||||
|
||||
// Deserialize the whole batch of documents in memory.
|
||||
let mut documents: Peekable<Box<dyn Iterator<Item=serde_json::Result<Map<String, Value>>>>> = if is_stream {
|
||||
@ -88,27 +89,15 @@ impl Transform<'_, '_> {
|
||||
};
|
||||
|
||||
// We extract the primary key from the first document in
|
||||
// the batch if it hasn't already been defined in the index.
|
||||
let primary_key = match primary_key {
|
||||
Some(primary_key) => primary_key,
|
||||
None => {
|
||||
// We ignore a potential error here as we can't early return it now,
|
||||
// the peek method gives us only a reference on the next item,
|
||||
// we will eventually return it in the iteration just after.
|
||||
let first = documents.peek().and_then(|r| r.as_ref().ok());
|
||||
match first.and_then(|doc| doc.keys().find(|k| k.contains("id"))) {
|
||||
Some(key) => fields_ids_map.insert(&key).context("field id limit reached")?,
|
||||
None => {
|
||||
if !self.autogenerate_docids {
|
||||
// If there is no primary key in the current document batch, we must
|
||||
// return an error and not automatically generate any document id.
|
||||
return Err(anyhow!("missing primary key"))
|
||||
}
|
||||
fields_ids_map.insert("id").context("field id limit reached")?
|
||||
},
|
||||
}
|
||||
},
|
||||
};
|
||||
// the batch if it hasn't already been defined in the index
|
||||
let first = documents.peek().and_then(|r| r.as_ref().ok());
|
||||
let alternative_name = first.and_then(|doc| doc.keys().find(|k| k.contains(DEFAULT_PRIMARY_KEY_NAME)).cloned());
|
||||
let (primary_key_id, primary_key) = compute_primary_key_pair(
|
||||
self.index.primary_key(self.rtxn)?,
|
||||
&mut fields_ids_map,
|
||||
alternative_name,
|
||||
self.autogenerate_docids
|
||||
)?;
|
||||
|
||||
if documents.peek().is_none() {
|
||||
return Ok(TransformOutput {
|
||||
@ -122,13 +111,6 @@ impl Transform<'_, '_> {
|
||||
});
|
||||
}
|
||||
|
||||
// Get the primary key field name now, this way we will
|
||||
// be able to get the value in the JSON Map document.
|
||||
let primary_key_name = fields_ids_map
|
||||
.name(primary_key)
|
||||
.expect("found the primary key name")
|
||||
.to_owned();
|
||||
|
||||
// We must choose the appropriate merge function for when two or more documents
|
||||
// with the same user id must be merged or fully replaced in the same batch.
|
||||
let merge_function = match self.index_documents_method {
|
||||
@ -170,7 +152,7 @@ impl Transform<'_, '_> {
|
||||
|
||||
// We retrieve the user id from the document based on the primary key name,
|
||||
// if the document id isn't present we generate a uuid.
|
||||
let external_id = match document.get(&primary_key_name) {
|
||||
let external_id = match document.get(&primary_key) {
|
||||
Some(value) => match value {
|
||||
Value::String(string) => Cow::Borrowed(string.as_str()),
|
||||
Value::Number(number) => Cow::Owned(number.to_string()),
|
||||
@ -196,7 +178,7 @@ impl Transform<'_, '_> {
|
||||
serde_json::to_writer(&mut json_buffer, value)?;
|
||||
writer.insert(field_id, &json_buffer)?;
|
||||
}
|
||||
else if field_id == primary_key {
|
||||
else if field_id == primary_key_id {
|
||||
// We validate the document id [a-zA-Z0-9\-_].
|
||||
let external_id = match validate_document_id(&external_id) {
|
||||
Some(valid) => valid,
|
||||
@ -240,42 +222,37 @@ impl Transform<'_, '_> {
|
||||
|
||||
let mut csv = csv::Reader::from_reader(reader);
|
||||
let headers = csv.headers()?;
|
||||
let primary_key = self.index.primary_key(self.rtxn)?;
|
||||
|
||||
// Generate the new fields ids based on the current fields ids and this CSV headers.
|
||||
let mut fields_ids = Vec::new();
|
||||
// Generate the new fields ids based on the current fields ids and this CSV headers.
|
||||
for (i, header) in headers.iter().enumerate() {
|
||||
let id = fields_ids_map.insert(header).context("field id limit reached)")?;
|
||||
fields_ids.push((id, i));
|
||||
}
|
||||
|
||||
// Extract the position of the primary key in the current headers, None if not found.
|
||||
let external_id_pos = match primary_key {
|
||||
let primary_key_pos = match self.index.primary_key(self.rtxn)? {
|
||||
Some(primary_key) => {
|
||||
// Te primary key have is known so we must find the position in the CSV headers.
|
||||
let name = fields_ids_map.name(primary_key).expect("found the primary key name");
|
||||
headers.iter().position(|h| h == name)
|
||||
// The primary key is known so we must find the position in the CSV headers.
|
||||
headers.iter().position(|h| h == primary_key)
|
||||
},
|
||||
None => headers.iter().position(|h| h.contains("id")),
|
||||
};
|
||||
|
||||
// Returns the field id in the fileds ids map, create an "id" field
|
||||
// Returns the field id in the fields ids map, create an "id" field
|
||||
// in case it is not in the current headers.
|
||||
let primary_key_field_id = match external_id_pos {
|
||||
Some(pos) => fields_ids_map.id(&headers[pos]).expect("found the primary key"),
|
||||
None => {
|
||||
if !self.autogenerate_docids {
|
||||
// If there is no primary key in the current document batch, we must
|
||||
// return an error and not automatically generate any document id.
|
||||
return Err(anyhow!("missing primary key"))
|
||||
}
|
||||
let field_id = fields_ids_map.insert("id").context("field id limit reached")?;
|
||||
// We make sure to add the primary key field id to the fields ids,
|
||||
// this way it is added to the obks.
|
||||
fields_ids.push((field_id, usize::max_value()));
|
||||
field_id
|
||||
},
|
||||
};
|
||||
let alternative_name = primary_key_pos.map(|pos| headers[pos].to_string());
|
||||
let (primary_key_id, _) = compute_primary_key_pair(
|
||||
self.index.primary_key(self.rtxn)?,
|
||||
&mut fields_ids_map,
|
||||
alternative_name,
|
||||
self.autogenerate_docids
|
||||
)?;
|
||||
|
||||
// The primary key field is not present in the header, so we need to create it.
|
||||
if primary_key_pos.is_none() {
|
||||
fields_ids.push((primary_key_id, usize::max_value()));
|
||||
}
|
||||
|
||||
// We sort the fields ids by the fields ids map id, this way we are sure to iterate over
|
||||
// the records fields in the fields ids map order and correctly generate the obkv.
|
||||
@ -310,7 +287,7 @@ impl Transform<'_, '_> {
|
||||
}
|
||||
|
||||
// We extract the user id if we know where it is or generate an UUID V4 otherwise.
|
||||
let external_id = match external_id_pos {
|
||||
let external_id = match primary_key_pos {
|
||||
Some(pos) => {
|
||||
let external_id = &record[pos];
|
||||
// We validate the document id [a-zA-Z0-9\-_].
|
||||
@ -326,7 +303,7 @@ impl Transform<'_, '_> {
|
||||
// we return the generated document id instead of the record field.
|
||||
let iter = fields_ids.iter()
|
||||
.map(|(fi, i)| {
|
||||
let field = if *fi == primary_key_field_id { external_id } else { &record[*i] };
|
||||
let field = if *fi == primary_key_id { external_id } else { &record[*i] };
|
||||
(fi, field)
|
||||
});
|
||||
|
||||
@ -349,9 +326,13 @@ impl Transform<'_, '_> {
|
||||
|
||||
// Now that we have a valid sorter that contains the user id and the obkv we
|
||||
// give it to the last transforming function which returns the TransformOutput.
|
||||
let primary_key_name = fields_ids_map
|
||||
.name(primary_key_id)
|
||||
.map(String::from)
|
||||
.expect("Primary key must be present in fields id map");
|
||||
self.output_from_sorter(
|
||||
sorter,
|
||||
primary_key_field_id,
|
||||
primary_key_name,
|
||||
fields_ids_map,
|
||||
documents_count,
|
||||
external_documents_ids,
|
||||
@ -365,7 +346,7 @@ impl Transform<'_, '_> {
|
||||
fn output_from_sorter<F>(
|
||||
self,
|
||||
sorter: grenad::Sorter<MergeFn>,
|
||||
primary_key: FieldId,
|
||||
primary_key: String,
|
||||
fields_ids_map: FieldsIdsMap,
|
||||
approximate_number_of_documents: usize,
|
||||
mut external_documents_ids: ExternalDocumentsIds<'_>,
|
||||
@ -477,11 +458,11 @@ impl Transform<'_, '_> {
|
||||
// TODO this can be done in parallel by using the rayon `ThreadPool`.
|
||||
pub fn remap_index_documents(
|
||||
self,
|
||||
primary_key: FieldId,
|
||||
fields_ids_map: FieldsIdsMap,
|
||||
primary_key: String,
|
||||
old_fields_ids_map: FieldsIdsMap,
|
||||
new_fields_ids_map: FieldsIdsMap,
|
||||
) -> anyhow::Result<TransformOutput>
|
||||
{
|
||||
let current_fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
|
||||
let external_documents_ids = self.index.external_documents_ids(self.rtxn)?;
|
||||
let documents_ids = self.index.documents_ids(self.rtxn)?;
|
||||
let documents_count = documents_ids.len() as usize;
|
||||
@ -499,8 +480,8 @@ impl Transform<'_, '_> {
|
||||
let mut obkv_writer = obkv::KvWriter::new(&mut obkv_buffer);
|
||||
|
||||
// We iterate over the new `FieldsIdsMap` ids in order and construct the new obkv.
|
||||
for (id, name) in fields_ids_map.iter() {
|
||||
if let Some(val) = current_fields_ids_map.id(name).and_then(|id| obkv.get(id)) {
|
||||
for (id, name) in new_fields_ids_map.iter() {
|
||||
if let Some(val) = old_fields_ids_map.id(name).and_then(|id| obkv.get(id)) {
|
||||
obkv_writer.insert(id, val)?;
|
||||
}
|
||||
}
|
||||
@ -516,7 +497,7 @@ impl Transform<'_, '_> {
|
||||
|
||||
Ok(TransformOutput {
|
||||
primary_key,
|
||||
fields_ids_map,
|
||||
fields_ids_map: new_fields_ids_map,
|
||||
external_documents_ids: external_documents_ids.into_static(),
|
||||
new_documents_ids: documents_ids,
|
||||
replaced_documents_ids: RoaringBitmap::default(),
|
||||
@ -526,6 +507,42 @@ impl Transform<'_, '_> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Given an optional primary key and an optional alternative name, returns the (field_id, attr_name)
|
||||
/// for the primary key according to the following rules:
|
||||
/// - if primary_key is `Some`, returns the id and the name, else
|
||||
/// - if alternative_name is Some, adds alternative to the fields_ids_map, and returns the pair, else
|
||||
/// - if autogenerate_docids is true, insert the default id value in the field ids map ("id") and
|
||||
/// returns the pair, else
|
||||
/// - returns an error.
|
||||
fn compute_primary_key_pair(
|
||||
primary_key: Option<&str>,
|
||||
fields_ids_map: &mut FieldsIdsMap,
|
||||
alternative_name: Option<String>,
|
||||
autogenerate_docids: bool,
|
||||
) -> anyhow::Result<(FieldId, String)> {
|
||||
match primary_key {
|
||||
Some(primary_key) => {
|
||||
let id = fields_ids_map.id(primary_key).expect("primary key must be present in the fields id map");
|
||||
Ok((id, primary_key.to_string()))
|
||||
}
|
||||
None => {
|
||||
let name = match alternative_name {
|
||||
Some(key) => key,
|
||||
None => {
|
||||
if !autogenerate_docids {
|
||||
// If there is no primary key in the current document batch, we must
|
||||
// return an error and not automatically generate any document id.
|
||||
anyhow::bail!("missing primary key")
|
||||
}
|
||||
DEFAULT_PRIMARY_KEY_NAME.to_string()
|
||||
},
|
||||
};
|
||||
let id = fields_ids_map.insert(&name).context("field id limit reached")?;
|
||||
Ok((id, name))
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Only the last value associated with an id is kept.
|
||||
fn keep_latest_obkv(_key: &[u8], obkvs: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
|
||||
obkvs.last().context("no last value").map(|last| last.clone().into_owned())
|
||||
@ -552,3 +569,73 @@ fn validate_document_id(document_id: &str) -> Option<&str> {
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
mod compute_primary_key {
|
||||
use super::compute_primary_key_pair;
|
||||
use super::FieldsIdsMap;
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn should_panic_primary_key_not_in_map() {
|
||||
let mut fields_map = FieldsIdsMap::new();
|
||||
let _result = compute_primary_key_pair(
|
||||
Some("toto"),
|
||||
&mut fields_map,
|
||||
None,
|
||||
false);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_return_primary_key_if_is_some() {
|
||||
let mut fields_map = FieldsIdsMap::new();
|
||||
fields_map.insert("toto").unwrap();
|
||||
let result = compute_primary_key_pair(
|
||||
Some("toto"),
|
||||
&mut fields_map,
|
||||
Some("tata".to_string()),
|
||||
false);
|
||||
assert_eq!(result.unwrap(), (0u8, "toto".to_string()));
|
||||
assert_eq!(fields_map.len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_return_alternative_if_primary_is_none() {
|
||||
let mut fields_map = FieldsIdsMap::new();
|
||||
let result = compute_primary_key_pair(
|
||||
None,
|
||||
&mut fields_map,
|
||||
Some("tata".to_string()),
|
||||
false);
|
||||
assert_eq!(result.unwrap(), (0u8, "tata".to_string()));
|
||||
assert_eq!(fields_map.len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_return_default_if_both_are_none() {
|
||||
let mut fields_map = FieldsIdsMap::new();
|
||||
let result = compute_primary_key_pair(
|
||||
None,
|
||||
&mut fields_map,
|
||||
None,
|
||||
true);
|
||||
assert_eq!(result.unwrap(), (0u8, "id".to_string()));
|
||||
assert_eq!(fields_map.len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_return_err_if_both_are_none_and_recompute_is_false(){
|
||||
let mut fields_map = FieldsIdsMap::new();
|
||||
let result = compute_primary_key_pair(
|
||||
None,
|
||||
&mut fields_map,
|
||||
None,
|
||||
false);
|
||||
assert!(result.is_err());
|
||||
assert_eq!(fields_map.len(), 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,14 +1,16 @@
|
||||
use std::collections::HashMap;
|
||||
use std::str::FromStr;
|
||||
|
||||
use anyhow::{ensure, Context};
|
||||
use anyhow::Context;
|
||||
use grenad::CompressionType;
|
||||
use itertools::Itertools;
|
||||
use rayon::ThreadPool;
|
||||
|
||||
use crate::criterion::Criterion;
|
||||
use crate::facet::FacetType;
|
||||
use crate::update::index_documents::{Transform, IndexDocumentsMethod};
|
||||
use crate::update::{ClearDocuments, IndexDocuments, UpdateIndexingStep};
|
||||
use crate::facet::FacetType;
|
||||
use crate::{Index, FieldsIdsMap, Criterion};
|
||||
use crate::{Index, FieldsIdsMap};
|
||||
|
||||
pub struct Settings<'a, 't, 'u, 'i> {
|
||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||
@ -26,7 +28,7 @@ pub struct Settings<'a, 't, 'u, 'i> {
|
||||
// however if it is `Some(None)` it means that the user forced a reset of the setting.
|
||||
searchable_fields: Option<Option<Vec<String>>>,
|
||||
displayed_fields: Option<Option<Vec<String>>>,
|
||||
faceted_fields: Option<HashMap<String, String>>,
|
||||
faceted_fields: Option<Option<HashMap<String, String>>>,
|
||||
criteria: Option<Option<Vec<String>>>,
|
||||
}
|
||||
|
||||
@ -67,7 +69,11 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
}
|
||||
|
||||
pub fn set_faceted_fields(&mut self, names_facet_types: HashMap<String, String>) {
|
||||
self.faceted_fields = Some(names_facet_types);
|
||||
self.faceted_fields = Some(Some(names_facet_types));
|
||||
}
|
||||
|
||||
pub fn reset_faceted_fields(&mut self) {
|
||||
self.faceted_fields = Some(None);
|
||||
}
|
||||
|
||||
pub fn reset_criteria(&mut self) {
|
||||
@ -78,183 +84,188 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
self.criteria = Some(Some(criteria));
|
||||
}
|
||||
|
||||
pub fn execute<F>(self, progress_callback: F) -> anyhow::Result<()>
|
||||
fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> anyhow::Result<()>
|
||||
where
|
||||
F: Fn(UpdateIndexingStep) + Sync
|
||||
F: Fn(UpdateIndexingStep) + Sync,
|
||||
{
|
||||
let mut updated_searchable_fields = None;
|
||||
let mut updated_faceted_fields = None;
|
||||
let mut updated_displayed_fields = None;
|
||||
let mut updated_criteria = None;
|
||||
|
||||
// Construct the new FieldsIdsMap based on the searchable fields order.
|
||||
let fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||
let mut fields_ids_map = match self.searchable_fields {
|
||||
Some(Some(searchable_fields)) => {
|
||||
let mut new_fields_ids_map = FieldsIdsMap::new();
|
||||
let mut new_searchable_fields = Vec::new();
|
||||
// if the settings are set before any document update, we don't need to do anything, and
|
||||
// will set the primary key during the first document addition.
|
||||
if self.index.number_of_documents(&self.wtxn)? == 0 {
|
||||
return Ok(())
|
||||
}
|
||||
|
||||
for name in searchable_fields {
|
||||
let id = new_fields_ids_map.insert(&name).context("field id limit reached")?;
|
||||
new_searchable_fields.push(id);
|
||||
}
|
||||
|
||||
for (_, name) in fields_ids_map.iter() {
|
||||
new_fields_ids_map.insert(name).context("field id limit reached")?;
|
||||
}
|
||||
|
||||
updated_searchable_fields = Some(Some(new_searchable_fields));
|
||||
new_fields_ids_map
|
||||
},
|
||||
Some(None) => {
|
||||
updated_searchable_fields = Some(None);
|
||||
fields_ids_map
|
||||
},
|
||||
None => fields_ids_map,
|
||||
let transform = Transform {
|
||||
rtxn: &self.wtxn,
|
||||
index: self.index,
|
||||
log_every_n: self.log_every_n,
|
||||
chunk_compression_type: self.chunk_compression_type,
|
||||
chunk_compression_level: self.chunk_compression_level,
|
||||
chunk_fusing_shrink_size: self.chunk_fusing_shrink_size,
|
||||
max_nb_chunks: self.max_nb_chunks,
|
||||
max_memory: self.max_memory,
|
||||
index_documents_method: IndexDocumentsMethod::ReplaceDocuments,
|
||||
autogenerate_docids: false,
|
||||
};
|
||||
|
||||
// We compute or generate the new primary key field id.
|
||||
// TODO make the primary key settable.
|
||||
let primary_key = match self.index.primary_key(&self.wtxn)? {
|
||||
Some(id) => {
|
||||
let current_fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||
let name = current_fields_ids_map.name(id).unwrap();
|
||||
fields_ids_map.insert(name).context("field id limit reached")?
|
||||
},
|
||||
None => fields_ids_map.insert("id").context("field id limit reached")?,
|
||||
};
|
||||
// There already has been a document addition, the primary key should be set by now.
|
||||
let primary_key = self.index.primary_key(&self.wtxn)?.context("Index must have a primary key")?;
|
||||
|
||||
let current_faceted_fields = self.index.faceted_fields(self.wtxn)?;
|
||||
if let Some(fields_names_facet_types) = self.faceted_fields {
|
||||
let mut faceted_fields = HashMap::new();
|
||||
for (name, sftype) in fields_names_facet_types {
|
||||
let ftype = FacetType::from_str(&sftype).with_context(|| format!("parsing facet type {:?}", sftype))?;
|
||||
let id = fields_ids_map.insert(&name).context("field id limit reached")?;
|
||||
match current_faceted_fields.get(&id) {
|
||||
Some(pftype) => {
|
||||
ensure!(ftype == *pftype, "{} facet type changed from {} to {}", name, ftype, pftype);
|
||||
faceted_fields.insert(id, ftype)
|
||||
},
|
||||
None => faceted_fields.insert(id, ftype),
|
||||
};
|
||||
}
|
||||
// We remap the documents fields based on the new `FieldsIdsMap`.
|
||||
let output = transform.remap_index_documents(
|
||||
primary_key.to_string(),
|
||||
old_fields_ids_map,
|
||||
fields_ids_map.clone())?;
|
||||
|
||||
updated_faceted_fields = Some(faceted_fields);
|
||||
}
|
||||
|
||||
// Check that the displayed attributes have been specified.
|
||||
if let Some(value) = self.displayed_fields {
|
||||
match value {
|
||||
Some(names) => {
|
||||
let mut new_displayed_fields = Vec::new();
|
||||
for name in names {
|
||||
let id = fields_ids_map.insert(&name).context("field id limit reached")?;
|
||||
new_displayed_fields.push(id);
|
||||
}
|
||||
updated_displayed_fields = Some(Some(new_displayed_fields));
|
||||
}
|
||||
None => updated_displayed_fields = Some(None),
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(criteria) = self.criteria {
|
||||
match criteria {
|
||||
Some(criteria_names) => {
|
||||
let mut new_criteria = Vec::new();
|
||||
for name in criteria_names {
|
||||
let criterion = Criterion::from_str(&mut fields_ids_map, &name)?;
|
||||
if let Some(fid) = criterion.field_id() {
|
||||
let name = fields_ids_map.name(fid).unwrap();
|
||||
let faceted_fields = updated_faceted_fields.as_ref().unwrap_or(¤t_faceted_fields);
|
||||
ensure!(faceted_fields.contains_key(&fid), "criterion field {} must be faceted", name);
|
||||
}
|
||||
new_criteria.push(criterion);
|
||||
}
|
||||
updated_criteria = Some(Some(new_criteria));
|
||||
},
|
||||
None => updated_criteria = Some(None),
|
||||
}
|
||||
}
|
||||
|
||||
// If any setting have modified any of the datastructures it means that we need
|
||||
// to retrieve the documents and then reindex then with the new settings.
|
||||
if updated_searchable_fields.is_some() || updated_faceted_fields.is_some() {
|
||||
let transform = Transform {
|
||||
rtxn: &self.wtxn,
|
||||
index: self.index,
|
||||
log_every_n: self.log_every_n,
|
||||
chunk_compression_type: self.chunk_compression_type,
|
||||
chunk_compression_level: self.chunk_compression_level,
|
||||
chunk_fusing_shrink_size: self.chunk_fusing_shrink_size,
|
||||
max_nb_chunks: self.max_nb_chunks,
|
||||
max_memory: self.max_memory,
|
||||
index_documents_method: IndexDocumentsMethod::ReplaceDocuments,
|
||||
autogenerate_docids: false,
|
||||
};
|
||||
|
||||
// We remap the documents fields based on the new `FieldsIdsMap`.
|
||||
let output = transform.remap_index_documents(primary_key, fields_ids_map.clone())?;
|
||||
|
||||
// We write the new FieldsIdsMap to the database
|
||||
// this way next indexing methods will be based on that.
|
||||
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
|
||||
|
||||
if let Some(faceted_fields) = updated_faceted_fields {
|
||||
// We write the faceted_fields fields into the database here.
|
||||
self.index.put_faceted_fields(self.wtxn, &faceted_fields)?;
|
||||
}
|
||||
|
||||
if let Some(searchable_fields) = updated_searchable_fields {
|
||||
// The new searchable fields are also written down to make sure
|
||||
// that the IndexDocuments system takes only these ones into account.
|
||||
match searchable_fields {
|
||||
Some(fields) => self.index.put_searchable_fields(self.wtxn, &fields)?,
|
||||
None => self.index.delete_searchable_fields(self.wtxn).map(drop)?,
|
||||
}
|
||||
}
|
||||
|
||||
// We clear the full database (words-fst, documents ids and documents content).
|
||||
ClearDocuments::new(self.wtxn, self.index).execute()?;
|
||||
|
||||
// We index the generated `TransformOutput` which must contain
|
||||
// all the documents with fields in the newly defined searchable order.
|
||||
let mut indexing_builder = IndexDocuments::new(self.wtxn, self.index);
|
||||
indexing_builder.log_every_n = self.log_every_n;
|
||||
indexing_builder.max_nb_chunks = self.max_nb_chunks;
|
||||
indexing_builder.max_memory = self.max_memory;
|
||||
indexing_builder.linked_hash_map_size = self.linked_hash_map_size;
|
||||
indexing_builder.chunk_compression_type = self.chunk_compression_type;
|
||||
indexing_builder.chunk_compression_level = self.chunk_compression_level;
|
||||
indexing_builder.chunk_fusing_shrink_size = self.chunk_fusing_shrink_size;
|
||||
indexing_builder.thread_pool = self.thread_pool;
|
||||
indexing_builder.execute_raw(output, &progress_callback)?;
|
||||
}
|
||||
|
||||
if let Some(displayed_fields) = updated_displayed_fields {
|
||||
match displayed_fields {
|
||||
Some(fields) => self.index.put_displayed_fields(self.wtxn, &fields)?,
|
||||
None => self.index.delete_displayed_fields(self.wtxn).map(drop)?,
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(criteria) = updated_criteria {
|
||||
match criteria {
|
||||
Some(criteria) => self.index.put_criteria(self.wtxn, &criteria)?,
|
||||
None => self.index.delete_criteria(self.wtxn).map(drop)?,
|
||||
}
|
||||
}
|
||||
// We clear the full database (words-fst, documents ids and documents content).
|
||||
ClearDocuments::new(self.wtxn, self.index).execute()?;
|
||||
|
||||
// We index the generated `TransformOutput` which must contain
|
||||
// all the documents with fields in the newly defined searchable order.
|
||||
let mut indexing_builder = IndexDocuments::new(self.wtxn, self.index);
|
||||
indexing_builder.log_every_n = self.log_every_n;
|
||||
indexing_builder.max_nb_chunks = self.max_nb_chunks;
|
||||
indexing_builder.max_memory = self.max_memory;
|
||||
indexing_builder.linked_hash_map_size = self.linked_hash_map_size;
|
||||
indexing_builder.chunk_compression_type = self.chunk_compression_type;
|
||||
indexing_builder.chunk_compression_level = self.chunk_compression_level;
|
||||
indexing_builder.chunk_fusing_shrink_size = self.chunk_fusing_shrink_size;
|
||||
indexing_builder.thread_pool = self.thread_pool;
|
||||
indexing_builder.execute_raw(output, &cb)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn update_displayed(&mut self) -> anyhow::Result<bool> {
|
||||
match self.displayed_fields {
|
||||
Some(Some(ref fields)) => {
|
||||
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||
// fields are deduplicated, only the first occurrence is taken into account
|
||||
let names: Vec<_> = fields
|
||||
.iter()
|
||||
.unique()
|
||||
.map(String::as_str)
|
||||
.collect();
|
||||
|
||||
for name in names.iter() {
|
||||
fields_ids_map
|
||||
.insert(name)
|
||||
.context("field id limit exceeded")?;
|
||||
}
|
||||
self.index.put_displayed_fields(self.wtxn, &names)?;
|
||||
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
|
||||
}
|
||||
Some(None) => { self.index.delete_displayed_fields(self.wtxn)?; },
|
||||
None => return Ok(false),
|
||||
}
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
/// Udpates the index's searchable attributes. This causes the field map to be recomputed to
|
||||
/// reflect the order of the searchable attributes.
|
||||
fn update_searchable(&mut self) -> anyhow::Result<bool> {
|
||||
match self.searchable_fields {
|
||||
Some(Some(ref fields)) => {
|
||||
// every time the searchable attributes are updated, we need to update the
|
||||
// ids for any settings that uses the facets. (displayed_fields,
|
||||
// faceted_fields)
|
||||
let old_fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||
|
||||
let mut new_fields_ids_map = FieldsIdsMap::new();
|
||||
// fields are deduplicated, only the first occurrence is taken into account
|
||||
let names = fields
|
||||
.iter()
|
||||
.unique()
|
||||
.map(String::as_str)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// Add all the searchable attributes to the field map, and then add the
|
||||
// remaining fields from the old field map to the new one
|
||||
for name in names.iter() {
|
||||
new_fields_ids_map
|
||||
.insert(&name)
|
||||
.context("field id limit exceeded")?;
|
||||
}
|
||||
|
||||
for (_, name) in old_fields_ids_map.iter() {
|
||||
new_fields_ids_map
|
||||
.insert(&name)
|
||||
.context("field id limit exceeded")?;
|
||||
}
|
||||
|
||||
self.index.put_searchable_fields(self.wtxn, &names)?;
|
||||
self.index.put_fields_ids_map(self.wtxn, &new_fields_ids_map)?;
|
||||
}
|
||||
Some(None) => { self.index.delete_searchable_fields(self.wtxn)?; },
|
||||
None => return Ok(false),
|
||||
}
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
fn update_facets(&mut self) -> anyhow::Result<bool> {
|
||||
match self.faceted_fields {
|
||||
Some(Some(ref fields)) => {
|
||||
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||
let mut new_facets = HashMap::new();
|
||||
for (name, ty) in fields {
|
||||
fields_ids_map.insert(name).context("field id limit exceeded")?;
|
||||
let ty = FacetType::from_str(&ty)?;
|
||||
new_facets.insert(name.clone(), ty);
|
||||
}
|
||||
self.index.put_faceted_fields(self.wtxn, &new_facets)?;
|
||||
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
|
||||
}
|
||||
Some(None) => { self.index.delete_faceted_fields(self.wtxn)?; },
|
||||
None => return Ok(false)
|
||||
}
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
fn update_criteria(&mut self) -> anyhow::Result<()> {
|
||||
match self.criteria {
|
||||
Some(Some(ref fields)) => {
|
||||
let faceted_fields = self.index.faceted_fields(&self.wtxn)?;
|
||||
let mut new_criteria = Vec::new();
|
||||
for name in fields {
|
||||
let criterion = Criterion::from_str(&faceted_fields, &name)?;
|
||||
new_criteria.push(criterion);
|
||||
}
|
||||
self.index.put_criteria(self.wtxn, &new_criteria)?;
|
||||
}
|
||||
Some(None) => { self.index.delete_criteria(self.wtxn)?; }
|
||||
None => (),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn execute<F>(mut self, progress_callback: F) -> anyhow::Result<()>
|
||||
where
|
||||
F: Fn(UpdateIndexingStep) + Sync
|
||||
{
|
||||
let old_fields_ids_map = self.index.fields_ids_map(&self.wtxn)?;
|
||||
self.update_displayed()?;
|
||||
let facets_updated = self.update_facets()?;
|
||||
// update_criteria MUST be called after update_facets, since criterion fields must be set
|
||||
// as facets.
|
||||
self.update_criteria()?;
|
||||
let searchable_updated = self.update_searchable()?;
|
||||
|
||||
if facets_updated || searchable_updated {
|
||||
self.reindex(&progress_callback, old_fields_ids_map)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::update::{IndexDocuments, UpdateFormat};
|
||||
|
||||
use heed::EnvOpenOptions;
|
||||
use maplit::hashmap;
|
||||
|
||||
use crate::facet::FacetType;
|
||||
use crate::update::{IndexDocuments, UpdateFormat};
|
||||
|
||||
#[test]
|
||||
fn set_and_reset_searchable_fields() {
|
||||
let path = tempfile::tempdir().unwrap();
|
||||
@ -336,10 +347,8 @@ mod tests {
|
||||
|
||||
// Check that the displayed fields are correctly set to `None` (default value).
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let fields_ids = index.displayed_fields(&rtxn).unwrap();
|
||||
let age_id = fields_ids_map.id("age").unwrap();
|
||||
assert_eq!(fields_ids, Some(&[age_id][..]));
|
||||
assert_eq!(fields_ids.unwrap(), (&["age"][..]));
|
||||
drop(rtxn);
|
||||
|
||||
// We change the searchable fields to be the "name" field only.
|
||||
@ -351,10 +360,8 @@ mod tests {
|
||||
|
||||
// Check that the displayed fields always contains only the "age" field.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let fields_ids = index.displayed_fields(&rtxn).unwrap();
|
||||
let age_id = fields_ids_map.id("age").unwrap();
|
||||
assert_eq!(fields_ids, Some(&[age_id][..]));
|
||||
assert_eq!(fields_ids.unwrap(), &["age"][..]);
|
||||
drop(rtxn);
|
||||
}
|
||||
|
||||
@ -402,10 +409,8 @@ mod tests {
|
||||
|
||||
// Check that the displayed fields are correctly set to only the "age" field.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let age_field_id = fields_ids_map.id("age").unwrap();
|
||||
let fields_ids = index.displayed_fields(&rtxn).unwrap();
|
||||
assert_eq!(fields_ids.unwrap(), &[age_field_id][..]);
|
||||
assert_eq!(fields_ids.unwrap(), &["age"][..]);
|
||||
drop(rtxn);
|
||||
|
||||
// We reset the fields ids to become `None`, the default value.
|
||||
@ -445,9 +450,9 @@ mod tests {
|
||||
// Check that the displayed fields are correctly set.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let fields_ids = index.faceted_fields(&rtxn).unwrap();
|
||||
assert_eq!(fields_ids, hashmap!{ 1 => FacetType::Integer });
|
||||
assert_eq!(fields_ids, hashmap!{ "age".to_string() => FacetType::Integer });
|
||||
// Only count the field_id 0 and level 0 facet values.
|
||||
let count = index.facet_field_id_value_docids.prefix_iter(&rtxn, &[1, 0]).unwrap().count();
|
||||
let count = index.facet_field_id_value_docids.prefix_iter(&rtxn, &[0, 0]).unwrap().count();
|
||||
assert_eq!(count, 3);
|
||||
drop(rtxn);
|
||||
|
||||
@ -461,8 +466,49 @@ mod tests {
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
// Only count the field_id 0 and level 0 facet values.
|
||||
let count = index.facet_field_id_value_docids.prefix_iter(&rtxn, &[1, 0]).unwrap().count();
|
||||
let count = index.facet_field_id_value_docids.prefix_iter(&rtxn, &[0, 0]).unwrap().count();
|
||||
assert_eq!(count, 4);
|
||||
drop(rtxn);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn setting_searchable_recomputes_other_settings() {
|
||||
let path = tempfile::tempdir().unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||
let index = Index::new(options, &path).unwrap();
|
||||
|
||||
// Set all the settings except searchable
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index);
|
||||
builder.set_displayed_fields(vec!["hello".to_string()]);
|
||||
builder.set_faceted_fields(hashmap!{
|
||||
"age".into() => "integer".into(),
|
||||
"toto".into() => "integer".into(),
|
||||
});
|
||||
builder.set_criteria(vec!["asc(toto)".to_string()]);
|
||||
builder.execute(|_| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
// check the output
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
assert_eq!(&["hello"][..], index.displayed_fields(&rtxn).unwrap().unwrap());
|
||||
// since no documents have been pushed the primary key is still unset
|
||||
assert!(index.primary_key(&rtxn).unwrap().is_none());
|
||||
assert_eq!(vec![Criterion::Asc("toto".to_string())], index.criteria(&rtxn).unwrap());
|
||||
drop(rtxn);
|
||||
|
||||
// We set toto and age as searchable to force reordering of the fields
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index);
|
||||
builder.set_searchable_fields(vec!["toto".to_string(), "age".to_string()]);
|
||||
builder.execute(|_| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
assert_eq!(&["hello"][..], index.displayed_fields(&rtxn).unwrap().unwrap());
|
||||
assert!(index.primary_key(&rtxn).unwrap().is_none());
|
||||
assert_eq!(vec![Criterion::Asc("toto".to_string())], index.criteria(&rtxn).unwrap());
|
||||
drop(rtxn);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user