mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-12 06:24:29 +01:00
Faceted fields settings must specify the facet type
This commit is contained in:
parent
ebe7087bff
commit
466fb601d6
@ -193,7 +193,7 @@ impl Index {
|
||||
|
||||
/// Writes the facet fields ids associated with their facet type or `None` if
|
||||
/// the facet type is currently unknown.
|
||||
pub fn put_faceted_fields(&self, wtxn: &mut RwTxn, fields_types: &HashMap<u8, Option<FacetType>>) -> heed::Result<()> {
|
||||
pub fn put_faceted_fields(&self, wtxn: &mut RwTxn, fields_types: &HashMap<u8, FacetType>) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeJson<_>>(wtxn, FACETED_FIELDS_KEY, fields_types)
|
||||
}
|
||||
|
||||
@ -203,7 +203,7 @@ impl Index {
|
||||
}
|
||||
|
||||
/// Returns the facet fields ids associated with their facet type.
|
||||
pub fn faceted_fields(&self, wtxn: &RoTxn) -> heed::Result<HashMap<u8, Option<FacetType>>> {
|
||||
pub fn faceted_fields(&self, wtxn: &RoTxn) -> heed::Result<HashMap<u8, FacetType>> {
|
||||
Ok(self.main.get::<_, Str, SerdeJson<_>>(wtxn, FACETED_FIELDS_KEY)?.unwrap_or_default())
|
||||
}
|
||||
|
||||
|
@ -61,9 +61,9 @@ pub fn obkv_to_json(
|
||||
}
|
||||
|
||||
/// Transform a JSON value into a string that can be indexed.
|
||||
pub fn json_to_string(value: Value) -> Option<String> {
|
||||
pub fn json_to_string(value: &Value) -> Option<String> {
|
||||
|
||||
fn inner(value: Value, output: &mut String) -> bool {
|
||||
fn inner(value: &Value, output: &mut String) -> bool {
|
||||
use std::fmt::Write;
|
||||
match value {
|
||||
Value::Null => false,
|
||||
@ -122,7 +122,7 @@ mod tests {
|
||||
"not_there": null,
|
||||
});
|
||||
|
||||
let string = json_to_string(value).unwrap();
|
||||
let string = json_to_string(&value).unwrap();
|
||||
assert_eq!(string, "name: John Doe. age: 43. ");
|
||||
}
|
||||
|
||||
@ -136,7 +136,7 @@ mod tests {
|
||||
null,
|
||||
]);
|
||||
|
||||
let string = json_to_string(value).unwrap();
|
||||
let string = json_to_string(&value).unwrap();
|
||||
// We don't care about having two point (.) after the other as
|
||||
// the distance of hard separators is clamped to 8 anyway.
|
||||
assert_eq!(string, "name: John Doe. . 43. hello. I. am. fine. . ");
|
||||
|
@ -329,6 +329,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
||||
WordDocids,
|
||||
}
|
||||
|
||||
let faceted_fields = self.index.faceted_fields(self.wtxn)?;
|
||||
let searchable_fields: HashSet<_> = match self.index.searchable_fields(self.wtxn)? {
|
||||
Some(fields) => fields.iter().copied().collect(),
|
||||
None => fields_ids_map.iter().map(|(id, _name)| id).collect(),
|
||||
@ -362,6 +363,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
||||
.map(|(i, documents)| {
|
||||
let store = Store::new(
|
||||
searchable_fields.clone(),
|
||||
faceted_fields.clone(),
|
||||
linked_hash_map_size,
|
||||
max_nb_chunks,
|
||||
max_memory_by_job,
|
||||
|
@ -14,6 +14,7 @@ use grenad::{Reader, FileFuse, Writer, Sorter, CompressionType};
|
||||
use roaring::RoaringBitmap;
|
||||
use tempfile::tempfile;
|
||||
|
||||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::{BoRoaringBitmapCodec, CboRoaringBitmapCodec};
|
||||
use crate::tokenizer::{simple_tokenizer, only_token};
|
||||
use crate::update::UpdateIndexingStep;
|
||||
@ -39,6 +40,7 @@ pub struct Readers {
|
||||
pub struct Store {
|
||||
// Indexing parameters
|
||||
searchable_fields: HashSet<u8>,
|
||||
faceted_fields: HashMap<u8, FacetType>,
|
||||
// Caches
|
||||
word_docids: LinkedHashMap<SmallVec32<u8>, RoaringBitmap>,
|
||||
word_docids_limit: usize,
|
||||
@ -60,6 +62,7 @@ pub struct Store {
|
||||
impl Store {
|
||||
pub fn new(
|
||||
searchable_fields: HashSet<u8>,
|
||||
faceted_fields: HashMap<u8, FacetType>,
|
||||
linked_hash_map_size: Option<usize>,
|
||||
max_nb_chunks: Option<usize>,
|
||||
max_memory: Option<usize>,
|
||||
@ -107,6 +110,7 @@ impl Store {
|
||||
Ok(Store {
|
||||
// Indexing parameters.
|
||||
searchable_fields,
|
||||
faceted_fields,
|
||||
// Caches
|
||||
word_docids: LinkedHashMap::with_capacity(linked_hash_map_size),
|
||||
word_docids_limit: linked_hash_map_size,
|
||||
@ -320,21 +324,26 @@ impl Store {
|
||||
}
|
||||
|
||||
for (attr, content) in document.iter() {
|
||||
if !self.searchable_fields.contains(&attr) {
|
||||
continue;
|
||||
}
|
||||
if self.faceted_fields.contains_key(&attr) || self.searchable_fields.contains(&attr) {
|
||||
let value = serde_json::from_slice(content)?;
|
||||
|
||||
let value = serde_json::from_slice(content)?;
|
||||
let content = match json_to_string(value) {
|
||||
Some(content) => content,
|
||||
None => continue,
|
||||
};
|
||||
if let Some(ftype) = self.faceted_fields.get(&attr) {
|
||||
todo!("parse facet field value")
|
||||
}
|
||||
|
||||
let tokens = simple_tokenizer(&content).filter_map(only_token);
|
||||
for (pos, token) in tokens.enumerate().take(MAX_POSITION) {
|
||||
let word = token.to_lowercase();
|
||||
let position = (attr as usize * MAX_POSITION + pos) as u32;
|
||||
words_positions.entry(word).or_insert_with(SmallVec32::new).push(position);
|
||||
if self.searchable_fields.contains(&attr) {
|
||||
let content = match json_to_string(&value) {
|
||||
Some(content) => content,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let tokens = simple_tokenizer(&content).filter_map(only_token);
|
||||
for (pos, token) in tokens.enumerate().take(MAX_POSITION) {
|
||||
let word = token.to_lowercase();
|
||||
let position = (attr as usize * MAX_POSITION + pos) as u32;
|
||||
words_positions.entry(word).or_insert_with(SmallVec32::new).push(position);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,11 +1,13 @@
|
||||
use std::collections::HashMap;
|
||||
use std::str::FromStr;
|
||||
|
||||
use anyhow::Context;
|
||||
use anyhow::{ensure, Context};
|
||||
use grenad::CompressionType;
|
||||
use rayon::ThreadPool;
|
||||
|
||||
use crate::update::index_documents::{Transform, IndexDocumentsMethod};
|
||||
use crate::update::{ClearDocuments, IndexDocuments, UpdateIndexingStep};
|
||||
use crate::facet::FacetType;
|
||||
use crate::{Index, FieldsIdsMap};
|
||||
|
||||
pub struct Settings<'a, 't, 'u, 'i> {
|
||||
@ -24,7 +26,7 @@ pub struct Settings<'a, 't, 'u, 'i> {
|
||||
// however if it is `Some(None)` it means that the user forced a reset of the setting.
|
||||
searchable_fields: Option<Option<Vec<String>>>,
|
||||
displayed_fields: Option<Option<Vec<String>>>,
|
||||
faceted_fields: Option<Vec<String>>,
|
||||
faceted_fields: Option<HashMap<String, String>>,
|
||||
}
|
||||
|
||||
impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
@ -62,25 +64,29 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
self.displayed_fields = Some(Some(names));
|
||||
}
|
||||
|
||||
pub fn set_faceted_fields(&mut self, names: Vec<String>) {
|
||||
self.faceted_fields = Some(names);
|
||||
pub fn set_faceted_fields(&mut self, names_facet_types: HashMap<String, String>) {
|
||||
self.faceted_fields = Some(names_facet_types);
|
||||
}
|
||||
|
||||
pub fn execute<F>(self, progress_callback: F) -> anyhow::Result<()>
|
||||
where
|
||||
F: Fn(UpdateIndexingStep) + Sync
|
||||
{
|
||||
if let Some(fields_names) = self.faceted_fields {
|
||||
if let Some(fields_names_facet_types) = self.faceted_fields {
|
||||
let current_faceted_fields = self.index.faceted_fields(self.wtxn)?;
|
||||
let current_fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||
|
||||
let mut fields_ids_map = current_fields_ids_map.clone();
|
||||
let mut faceted_fields = HashMap::new();
|
||||
for name in fields_names {
|
||||
for (name, sftype) in fields_names_facet_types {
|
||||
let ftype = FacetType::from_str(&sftype).with_context(|| format!("parsing facet type {:?}", sftype))?;
|
||||
let id = fields_ids_map.insert(&name).context("field id limit reached")?;
|
||||
match current_faceted_fields.get(&id) {
|
||||
Some(ftype) => faceted_fields.insert(id, ftype.clone()),
|
||||
None => faceted_fields.insert(id, None),
|
||||
Some(pftype) => {
|
||||
ensure!(ftype == *pftype, "{} facet type changed from {} to {}", name, ftype, pftype);
|
||||
faceted_fields.insert(id, ftype)
|
||||
},
|
||||
None => faceted_fields.insert(id, ftype),
|
||||
};
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user