mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-23 13:24:27 +01:00
Faceted fields settings must specify the facet type
This commit is contained in:
parent
ebe7087bff
commit
466fb601d6
@ -193,7 +193,7 @@ impl Index {
|
|||||||
|
|
||||||
/// Writes the facet fields ids associated with their facet type or `None` if
|
/// Writes the facet fields ids associated with their facet type or `None` if
|
||||||
/// the facet type is currently unknown.
|
/// the facet type is currently unknown.
|
||||||
pub fn put_faceted_fields(&self, wtxn: &mut RwTxn, fields_types: &HashMap<u8, Option<FacetType>>) -> heed::Result<()> {
|
pub fn put_faceted_fields(&self, wtxn: &mut RwTxn, fields_types: &HashMap<u8, FacetType>) -> heed::Result<()> {
|
||||||
self.main.put::<_, Str, SerdeJson<_>>(wtxn, FACETED_FIELDS_KEY, fields_types)
|
self.main.put::<_, Str, SerdeJson<_>>(wtxn, FACETED_FIELDS_KEY, fields_types)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -203,7 +203,7 @@ impl Index {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the facet fields ids associated with their facet type.
|
/// Returns the facet fields ids associated with their facet type.
|
||||||
pub fn faceted_fields(&self, wtxn: &RoTxn) -> heed::Result<HashMap<u8, Option<FacetType>>> {
|
pub fn faceted_fields(&self, wtxn: &RoTxn) -> heed::Result<HashMap<u8, FacetType>> {
|
||||||
Ok(self.main.get::<_, Str, SerdeJson<_>>(wtxn, FACETED_FIELDS_KEY)?.unwrap_or_default())
|
Ok(self.main.get::<_, Str, SerdeJson<_>>(wtxn, FACETED_FIELDS_KEY)?.unwrap_or_default())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -61,9 +61,9 @@ pub fn obkv_to_json(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Transform a JSON value into a string that can be indexed.
|
/// Transform a JSON value into a string that can be indexed.
|
||||||
pub fn json_to_string(value: Value) -> Option<String> {
|
pub fn json_to_string(value: &Value) -> Option<String> {
|
||||||
|
|
||||||
fn inner(value: Value, output: &mut String) -> bool {
|
fn inner(value: &Value, output: &mut String) -> bool {
|
||||||
use std::fmt::Write;
|
use std::fmt::Write;
|
||||||
match value {
|
match value {
|
||||||
Value::Null => false,
|
Value::Null => false,
|
||||||
@ -122,7 +122,7 @@ mod tests {
|
|||||||
"not_there": null,
|
"not_there": null,
|
||||||
});
|
});
|
||||||
|
|
||||||
let string = json_to_string(value).unwrap();
|
let string = json_to_string(&value).unwrap();
|
||||||
assert_eq!(string, "name: John Doe. age: 43. ");
|
assert_eq!(string, "name: John Doe. age: 43. ");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -136,7 +136,7 @@ mod tests {
|
|||||||
null,
|
null,
|
||||||
]);
|
]);
|
||||||
|
|
||||||
let string = json_to_string(value).unwrap();
|
let string = json_to_string(&value).unwrap();
|
||||||
// We don't care about having two point (.) after the other as
|
// We don't care about having two point (.) after the other as
|
||||||
// the distance of hard separators is clamped to 8 anyway.
|
// the distance of hard separators is clamped to 8 anyway.
|
||||||
assert_eq!(string, "name: John Doe. . 43. hello. I. am. fine. . ");
|
assert_eq!(string, "name: John Doe. . 43. hello. I. am. fine. . ");
|
||||||
|
@ -329,6 +329,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
WordDocids,
|
WordDocids,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let faceted_fields = self.index.faceted_fields(self.wtxn)?;
|
||||||
let searchable_fields: HashSet<_> = match self.index.searchable_fields(self.wtxn)? {
|
let searchable_fields: HashSet<_> = match self.index.searchable_fields(self.wtxn)? {
|
||||||
Some(fields) => fields.iter().copied().collect(),
|
Some(fields) => fields.iter().copied().collect(),
|
||||||
None => fields_ids_map.iter().map(|(id, _name)| id).collect(),
|
None => fields_ids_map.iter().map(|(id, _name)| id).collect(),
|
||||||
@ -362,6 +363,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
.map(|(i, documents)| {
|
.map(|(i, documents)| {
|
||||||
let store = Store::new(
|
let store = Store::new(
|
||||||
searchable_fields.clone(),
|
searchable_fields.clone(),
|
||||||
|
faceted_fields.clone(),
|
||||||
linked_hash_map_size,
|
linked_hash_map_size,
|
||||||
max_nb_chunks,
|
max_nb_chunks,
|
||||||
max_memory_by_job,
|
max_memory_by_job,
|
||||||
|
@ -14,6 +14,7 @@ use grenad::{Reader, FileFuse, Writer, Sorter, CompressionType};
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use tempfile::tempfile;
|
use tempfile::tempfile;
|
||||||
|
|
||||||
|
use crate::facet::FacetType;
|
||||||
use crate::heed_codec::{BoRoaringBitmapCodec, CboRoaringBitmapCodec};
|
use crate::heed_codec::{BoRoaringBitmapCodec, CboRoaringBitmapCodec};
|
||||||
use crate::tokenizer::{simple_tokenizer, only_token};
|
use crate::tokenizer::{simple_tokenizer, only_token};
|
||||||
use crate::update::UpdateIndexingStep;
|
use crate::update::UpdateIndexingStep;
|
||||||
@ -39,6 +40,7 @@ pub struct Readers {
|
|||||||
pub struct Store {
|
pub struct Store {
|
||||||
// Indexing parameters
|
// Indexing parameters
|
||||||
searchable_fields: HashSet<u8>,
|
searchable_fields: HashSet<u8>,
|
||||||
|
faceted_fields: HashMap<u8, FacetType>,
|
||||||
// Caches
|
// Caches
|
||||||
word_docids: LinkedHashMap<SmallVec32<u8>, RoaringBitmap>,
|
word_docids: LinkedHashMap<SmallVec32<u8>, RoaringBitmap>,
|
||||||
word_docids_limit: usize,
|
word_docids_limit: usize,
|
||||||
@ -60,6 +62,7 @@ pub struct Store {
|
|||||||
impl Store {
|
impl Store {
|
||||||
pub fn new(
|
pub fn new(
|
||||||
searchable_fields: HashSet<u8>,
|
searchable_fields: HashSet<u8>,
|
||||||
|
faceted_fields: HashMap<u8, FacetType>,
|
||||||
linked_hash_map_size: Option<usize>,
|
linked_hash_map_size: Option<usize>,
|
||||||
max_nb_chunks: Option<usize>,
|
max_nb_chunks: Option<usize>,
|
||||||
max_memory: Option<usize>,
|
max_memory: Option<usize>,
|
||||||
@ -107,6 +110,7 @@ impl Store {
|
|||||||
Ok(Store {
|
Ok(Store {
|
||||||
// Indexing parameters.
|
// Indexing parameters.
|
||||||
searchable_fields,
|
searchable_fields,
|
||||||
|
faceted_fields,
|
||||||
// Caches
|
// Caches
|
||||||
word_docids: LinkedHashMap::with_capacity(linked_hash_map_size),
|
word_docids: LinkedHashMap::with_capacity(linked_hash_map_size),
|
||||||
word_docids_limit: linked_hash_map_size,
|
word_docids_limit: linked_hash_map_size,
|
||||||
@ -320,21 +324,26 @@ impl Store {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (attr, content) in document.iter() {
|
for (attr, content) in document.iter() {
|
||||||
if !self.searchable_fields.contains(&attr) {
|
if self.faceted_fields.contains_key(&attr) || self.searchable_fields.contains(&attr) {
|
||||||
continue;
|
let value = serde_json::from_slice(content)?;
|
||||||
}
|
|
||||||
|
|
||||||
let value = serde_json::from_slice(content)?;
|
if let Some(ftype) = self.faceted_fields.get(&attr) {
|
||||||
let content = match json_to_string(value) {
|
todo!("parse facet field value")
|
||||||
Some(content) => content,
|
}
|
||||||
None => continue,
|
|
||||||
};
|
|
||||||
|
|
||||||
let tokens = simple_tokenizer(&content).filter_map(only_token);
|
if self.searchable_fields.contains(&attr) {
|
||||||
for (pos, token) in tokens.enumerate().take(MAX_POSITION) {
|
let content = match json_to_string(&value) {
|
||||||
let word = token.to_lowercase();
|
Some(content) => content,
|
||||||
let position = (attr as usize * MAX_POSITION + pos) as u32;
|
None => continue,
|
||||||
words_positions.entry(word).or_insert_with(SmallVec32::new).push(position);
|
};
|
||||||
|
|
||||||
|
let tokens = simple_tokenizer(&content).filter_map(only_token);
|
||||||
|
for (pos, token) in tokens.enumerate().take(MAX_POSITION) {
|
||||||
|
let word = token.to_lowercase();
|
||||||
|
let position = (attr as usize * MAX_POSITION + pos) as u32;
|
||||||
|
words_positions.entry(word).or_insert_with(SmallVec32::new).push(position);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,11 +1,13 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
use std::str::FromStr;
|
||||||
|
|
||||||
use anyhow::Context;
|
use anyhow::{ensure, Context};
|
||||||
use grenad::CompressionType;
|
use grenad::CompressionType;
|
||||||
use rayon::ThreadPool;
|
use rayon::ThreadPool;
|
||||||
|
|
||||||
use crate::update::index_documents::{Transform, IndexDocumentsMethod};
|
use crate::update::index_documents::{Transform, IndexDocumentsMethod};
|
||||||
use crate::update::{ClearDocuments, IndexDocuments, UpdateIndexingStep};
|
use crate::update::{ClearDocuments, IndexDocuments, UpdateIndexingStep};
|
||||||
|
use crate::facet::FacetType;
|
||||||
use crate::{Index, FieldsIdsMap};
|
use crate::{Index, FieldsIdsMap};
|
||||||
|
|
||||||
pub struct Settings<'a, 't, 'u, 'i> {
|
pub struct Settings<'a, 't, 'u, 'i> {
|
||||||
@ -24,7 +26,7 @@ pub struct Settings<'a, 't, 'u, 'i> {
|
|||||||
// however if it is `Some(None)` it means that the user forced a reset of the setting.
|
// however if it is `Some(None)` it means that the user forced a reset of the setting.
|
||||||
searchable_fields: Option<Option<Vec<String>>>,
|
searchable_fields: Option<Option<Vec<String>>>,
|
||||||
displayed_fields: Option<Option<Vec<String>>>,
|
displayed_fields: Option<Option<Vec<String>>>,
|
||||||
faceted_fields: Option<Vec<String>>,
|
faceted_fields: Option<HashMap<String, String>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||||
@ -62,25 +64,29 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
self.displayed_fields = Some(Some(names));
|
self.displayed_fields = Some(Some(names));
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn set_faceted_fields(&mut self, names: Vec<String>) {
|
pub fn set_faceted_fields(&mut self, names_facet_types: HashMap<String, String>) {
|
||||||
self.faceted_fields = Some(names);
|
self.faceted_fields = Some(names_facet_types);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn execute<F>(self, progress_callback: F) -> anyhow::Result<()>
|
pub fn execute<F>(self, progress_callback: F) -> anyhow::Result<()>
|
||||||
where
|
where
|
||||||
F: Fn(UpdateIndexingStep) + Sync
|
F: Fn(UpdateIndexingStep) + Sync
|
||||||
{
|
{
|
||||||
if let Some(fields_names) = self.faceted_fields {
|
if let Some(fields_names_facet_types) = self.faceted_fields {
|
||||||
let current_faceted_fields = self.index.faceted_fields(self.wtxn)?;
|
let current_faceted_fields = self.index.faceted_fields(self.wtxn)?;
|
||||||
let current_fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
let current_fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||||
|
|
||||||
let mut fields_ids_map = current_fields_ids_map.clone();
|
let mut fields_ids_map = current_fields_ids_map.clone();
|
||||||
let mut faceted_fields = HashMap::new();
|
let mut faceted_fields = HashMap::new();
|
||||||
for name in fields_names {
|
for (name, sftype) in fields_names_facet_types {
|
||||||
|
let ftype = FacetType::from_str(&sftype).with_context(|| format!("parsing facet type {:?}", sftype))?;
|
||||||
let id = fields_ids_map.insert(&name).context("field id limit reached")?;
|
let id = fields_ids_map.insert(&name).context("field id limit reached")?;
|
||||||
match current_faceted_fields.get(&id) {
|
match current_faceted_fields.get(&id) {
|
||||||
Some(ftype) => faceted_fields.insert(id, ftype.clone()),
|
Some(pftype) => {
|
||||||
None => faceted_fields.insert(id, None),
|
ensure!(ftype == *pftype, "{} facet type changed from {} to {}", name, ftype, pftype);
|
||||||
|
faceted_fields.insert(id, ftype)
|
||||||
|
},
|
||||||
|
None => faceted_fields.insert(id, ftype),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user