mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-12 06:24:29 +01:00
Make sure that the indexing Store only index searchable fields
This commit is contained in:
parent
e48630da72
commit
649fb6e401
@ -1,4 +1,5 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashSet;
|
||||
use std::fs::File;
|
||||
use std::io::{self, Seek, SeekFrom};
|
||||
use std::sync::mpsc::sync_channel;
|
||||
@ -327,6 +328,11 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
||||
WordsPairsProximitiesDocids,
|
||||
}
|
||||
|
||||
let searchable_fields: HashSet<_> = match self.index.searchable_fields(self.wtxn)? {
|
||||
Some(fields) => fields.iter().copied().collect(),
|
||||
None => fields_ids_map.iter().map(|(id, _name)| id).collect(),
|
||||
};
|
||||
|
||||
let linked_hash_map_size = self.linked_hash_map_size;
|
||||
let max_nb_chunks = self.max_nb_chunks;
|
||||
let max_memory = self.max_memory;
|
||||
@ -354,6 +360,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
||||
.enumerate()
|
||||
.map(|(i, documents)| {
|
||||
let store = Store::new(
|
||||
searchable_fields.clone(),
|
||||
linked_hash_map_size,
|
||||
max_nb_chunks,
|
||||
max_memory_by_job,
|
||||
|
@ -1,5 +1,5 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::collections::{BTreeMap, HashMap, HashSet};
|
||||
use std::convert::{TryFrom, TryInto};
|
||||
use std::fs::File;
|
||||
use std::iter::FromIterator;
|
||||
@ -37,6 +37,9 @@ pub struct Readers {
|
||||
}
|
||||
|
||||
pub struct Store {
|
||||
// Indexing parameters
|
||||
searchable_fields: HashSet<u8>,
|
||||
// Caches
|
||||
word_docids: LinkedHashMap<SmallVec32<u8>, RoaringBitmap>,
|
||||
word_docids_limit: usize,
|
||||
words_pairs_proximities_docids: LinkedHashMap<(SmallVec32<u8>, SmallVec32<u8>, u8), RoaringBitmap>,
|
||||
@ -56,6 +59,7 @@ pub struct Store {
|
||||
|
||||
impl Store {
|
||||
pub fn new(
|
||||
searchable_fields: HashSet<u8>,
|
||||
linked_hash_map_size: Option<usize>,
|
||||
max_nb_chunks: Option<usize>,
|
||||
max_memory: Option<usize>,
|
||||
@ -101,18 +105,22 @@ impl Store {
|
||||
})?;
|
||||
|
||||
Ok(Store {
|
||||
// Indexing parameters.
|
||||
searchable_fields,
|
||||
// Caches
|
||||
word_docids: LinkedHashMap::with_capacity(linked_hash_map_size),
|
||||
word_docids_limit: linked_hash_map_size,
|
||||
words_pairs_proximities_docids: LinkedHashMap::with_capacity(linked_hash_map_size),
|
||||
words_pairs_proximities_docids_limit: linked_hash_map_size,
|
||||
// MTBL parameters
|
||||
chunk_compression_type,
|
||||
chunk_compression_level,
|
||||
chunk_fusing_shrink_size,
|
||||
|
||||
// MTBL sorters
|
||||
main_sorter,
|
||||
word_docids_sorter,
|
||||
words_pairs_proximities_docids_sorter,
|
||||
|
||||
// MTBL writers
|
||||
docid_word_positions_writer,
|
||||
documents_writer,
|
||||
})
|
||||
@ -309,23 +317,25 @@ impl Store {
|
||||
}
|
||||
|
||||
for (attr, content) in document.iter() {
|
||||
use serde_json::Value;
|
||||
let content: Cow<str> = match serde_json::from_slice(content) {
|
||||
Ok(string) => string,
|
||||
Err(_) => match serde_json::from_slice(content)? {
|
||||
Value::Null => continue,
|
||||
Value::Bool(boolean) => Cow::Owned(boolean.to_string()),
|
||||
Value::Number(number) => Cow::Owned(number.to_string()),
|
||||
Value::String(string) => Cow::Owned(string),
|
||||
Value::Array(_array) => continue,
|
||||
Value::Object(_object) => continue,
|
||||
}
|
||||
};
|
||||
if self.searchable_fields.contains(&attr) {
|
||||
use serde_json::Value;
|
||||
let content: Cow<str> = match serde_json::from_slice(content) {
|
||||
Ok(string) => string,
|
||||
Err(_) => match serde_json::from_slice(content)? {
|
||||
Value::Null => continue,
|
||||
Value::Bool(boolean) => Cow::Owned(boolean.to_string()),
|
||||
Value::Number(number) => Cow::Owned(number.to_string()),
|
||||
Value::String(string) => Cow::Owned(string),
|
||||
Value::Array(_array) => continue,
|
||||
Value::Object(_object) => continue,
|
||||
}
|
||||
};
|
||||
|
||||
for (pos, token) in simple_tokenizer(&content).filter_map(only_token).enumerate().take(MAX_POSITION) {
|
||||
let word = token.to_lowercase();
|
||||
let position = (attr as usize * MAX_POSITION + pos) as u32;
|
||||
words_positions.entry(word).or_insert_with(SmallVec32::new).push(position);
|
||||
for (pos, token) in simple_tokenizer(&content).filter_map(only_token).enumerate().take(MAX_POSITION) {
|
||||
let word = token.to_lowercase();
|
||||
let position = (attr as usize * MAX_POSITION + pos) as u32;
|
||||
words_positions.entry(word).or_insert_with(SmallVec32::new).push(position);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -42,6 +42,14 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn reset_searchable_fields(&mut self) {
|
||||
self.searchable_fields = Some(None);
|
||||
}
|
||||
|
||||
pub fn set_searchable_fields(&mut self, names: Vec<String>) {
|
||||
self.searchable_fields = Some(Some(names));
|
||||
}
|
||||
|
||||
pub fn reset_displayed_fields(&mut self) {
|
||||
self.displayed_fields = Some(None);
|
||||
}
|
||||
@ -56,7 +64,6 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
{
|
||||
// Check that the searchable attributes have been specified.
|
||||
if let Some(value) = self.searchable_fields {
|
||||
let current_searchable_fields = self.index.searchable_fields(self.wtxn)?;
|
||||
let current_displayed_fields = self.index.displayed_fields(self.wtxn)?;
|
||||
let current_fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||
|
||||
@ -93,7 +100,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
},
|
||||
None => (
|
||||
current_fields_ids_map.clone(),
|
||||
current_searchable_fields.map(ToOwned::to_owned),
|
||||
None,
|
||||
current_displayed_fields.map(ToOwned::to_owned),
|
||||
),
|
||||
};
|
||||
|
Loading…
x
Reference in New Issue
Block a user