mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-23 13:24:27 +01:00
Make sure that the indexing Store only index searchable fields
This commit is contained in:
parent
e48630da72
commit
649fb6e401
@ -1,4 +1,5 @@
|
|||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
|
use std::collections::HashSet;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{self, Seek, SeekFrom};
|
use std::io::{self, Seek, SeekFrom};
|
||||||
use std::sync::mpsc::sync_channel;
|
use std::sync::mpsc::sync_channel;
|
||||||
@ -327,6 +328,11 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
WordsPairsProximitiesDocids,
|
WordsPairsProximitiesDocids,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let searchable_fields: HashSet<_> = match self.index.searchable_fields(self.wtxn)? {
|
||||||
|
Some(fields) => fields.iter().copied().collect(),
|
||||||
|
None => fields_ids_map.iter().map(|(id, _name)| id).collect(),
|
||||||
|
};
|
||||||
|
|
||||||
let linked_hash_map_size = self.linked_hash_map_size;
|
let linked_hash_map_size = self.linked_hash_map_size;
|
||||||
let max_nb_chunks = self.max_nb_chunks;
|
let max_nb_chunks = self.max_nb_chunks;
|
||||||
let max_memory = self.max_memory;
|
let max_memory = self.max_memory;
|
||||||
@ -354,6 +360,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
.enumerate()
|
.enumerate()
|
||||||
.map(|(i, documents)| {
|
.map(|(i, documents)| {
|
||||||
let store = Store::new(
|
let store = Store::new(
|
||||||
|
searchable_fields.clone(),
|
||||||
linked_hash_map_size,
|
linked_hash_map_size,
|
||||||
max_nb_chunks,
|
max_nb_chunks,
|
||||||
max_memory_by_job,
|
max_memory_by_job,
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::collections::{BTreeMap, HashMap};
|
use std::collections::{BTreeMap, HashMap, HashSet};
|
||||||
use std::convert::{TryFrom, TryInto};
|
use std::convert::{TryFrom, TryInto};
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::iter::FromIterator;
|
use std::iter::FromIterator;
|
||||||
@ -37,6 +37,9 @@ pub struct Readers {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub struct Store {
|
pub struct Store {
|
||||||
|
// Indexing parameters
|
||||||
|
searchable_fields: HashSet<u8>,
|
||||||
|
// Caches
|
||||||
word_docids: LinkedHashMap<SmallVec32<u8>, RoaringBitmap>,
|
word_docids: LinkedHashMap<SmallVec32<u8>, RoaringBitmap>,
|
||||||
word_docids_limit: usize,
|
word_docids_limit: usize,
|
||||||
words_pairs_proximities_docids: LinkedHashMap<(SmallVec32<u8>, SmallVec32<u8>, u8), RoaringBitmap>,
|
words_pairs_proximities_docids: LinkedHashMap<(SmallVec32<u8>, SmallVec32<u8>, u8), RoaringBitmap>,
|
||||||
@ -56,6 +59,7 @@ pub struct Store {
|
|||||||
|
|
||||||
impl Store {
|
impl Store {
|
||||||
pub fn new(
|
pub fn new(
|
||||||
|
searchable_fields: HashSet<u8>,
|
||||||
linked_hash_map_size: Option<usize>,
|
linked_hash_map_size: Option<usize>,
|
||||||
max_nb_chunks: Option<usize>,
|
max_nb_chunks: Option<usize>,
|
||||||
max_memory: Option<usize>,
|
max_memory: Option<usize>,
|
||||||
@ -101,18 +105,22 @@ impl Store {
|
|||||||
})?;
|
})?;
|
||||||
|
|
||||||
Ok(Store {
|
Ok(Store {
|
||||||
|
// Indexing parameters.
|
||||||
|
searchable_fields,
|
||||||
|
// Caches
|
||||||
word_docids: LinkedHashMap::with_capacity(linked_hash_map_size),
|
word_docids: LinkedHashMap::with_capacity(linked_hash_map_size),
|
||||||
word_docids_limit: linked_hash_map_size,
|
word_docids_limit: linked_hash_map_size,
|
||||||
words_pairs_proximities_docids: LinkedHashMap::with_capacity(linked_hash_map_size),
|
words_pairs_proximities_docids: LinkedHashMap::with_capacity(linked_hash_map_size),
|
||||||
words_pairs_proximities_docids_limit: linked_hash_map_size,
|
words_pairs_proximities_docids_limit: linked_hash_map_size,
|
||||||
|
// MTBL parameters
|
||||||
chunk_compression_type,
|
chunk_compression_type,
|
||||||
chunk_compression_level,
|
chunk_compression_level,
|
||||||
chunk_fusing_shrink_size,
|
chunk_fusing_shrink_size,
|
||||||
|
// MTBL sorters
|
||||||
main_sorter,
|
main_sorter,
|
||||||
word_docids_sorter,
|
word_docids_sorter,
|
||||||
words_pairs_proximities_docids_sorter,
|
words_pairs_proximities_docids_sorter,
|
||||||
|
// MTBL writers
|
||||||
docid_word_positions_writer,
|
docid_word_positions_writer,
|
||||||
documents_writer,
|
documents_writer,
|
||||||
})
|
})
|
||||||
@ -309,23 +317,25 @@ impl Store {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (attr, content) in document.iter() {
|
for (attr, content) in document.iter() {
|
||||||
use serde_json::Value;
|
if self.searchable_fields.contains(&attr) {
|
||||||
let content: Cow<str> = match serde_json::from_slice(content) {
|
use serde_json::Value;
|
||||||
Ok(string) => string,
|
let content: Cow<str> = match serde_json::from_slice(content) {
|
||||||
Err(_) => match serde_json::from_slice(content)? {
|
Ok(string) => string,
|
||||||
Value::Null => continue,
|
Err(_) => match serde_json::from_slice(content)? {
|
||||||
Value::Bool(boolean) => Cow::Owned(boolean.to_string()),
|
Value::Null => continue,
|
||||||
Value::Number(number) => Cow::Owned(number.to_string()),
|
Value::Bool(boolean) => Cow::Owned(boolean.to_string()),
|
||||||
Value::String(string) => Cow::Owned(string),
|
Value::Number(number) => Cow::Owned(number.to_string()),
|
||||||
Value::Array(_array) => continue,
|
Value::String(string) => Cow::Owned(string),
|
||||||
Value::Object(_object) => continue,
|
Value::Array(_array) => continue,
|
||||||
}
|
Value::Object(_object) => continue,
|
||||||
};
|
}
|
||||||
|
};
|
||||||
|
|
||||||
for (pos, token) in simple_tokenizer(&content).filter_map(only_token).enumerate().take(MAX_POSITION) {
|
for (pos, token) in simple_tokenizer(&content).filter_map(only_token).enumerate().take(MAX_POSITION) {
|
||||||
let word = token.to_lowercase();
|
let word = token.to_lowercase();
|
||||||
let position = (attr as usize * MAX_POSITION + pos) as u32;
|
let position = (attr as usize * MAX_POSITION + pos) as u32;
|
||||||
words_positions.entry(word).or_insert_with(SmallVec32::new).push(position);
|
words_positions.entry(word).or_insert_with(SmallVec32::new).push(position);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -42,6 +42,14 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn reset_searchable_fields(&mut self) {
|
||||||
|
self.searchable_fields = Some(None);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn set_searchable_fields(&mut self, names: Vec<String>) {
|
||||||
|
self.searchable_fields = Some(Some(names));
|
||||||
|
}
|
||||||
|
|
||||||
pub fn reset_displayed_fields(&mut self) {
|
pub fn reset_displayed_fields(&mut self) {
|
||||||
self.displayed_fields = Some(None);
|
self.displayed_fields = Some(None);
|
||||||
}
|
}
|
||||||
@ -56,7 +64,6 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
{
|
{
|
||||||
// Check that the searchable attributes have been specified.
|
// Check that the searchable attributes have been specified.
|
||||||
if let Some(value) = self.searchable_fields {
|
if let Some(value) = self.searchable_fields {
|
||||||
let current_searchable_fields = self.index.searchable_fields(self.wtxn)?;
|
|
||||||
let current_displayed_fields = self.index.displayed_fields(self.wtxn)?;
|
let current_displayed_fields = self.index.displayed_fields(self.wtxn)?;
|
||||||
let current_fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
let current_fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||||
|
|
||||||
@ -93,7 +100,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
},
|
},
|
||||||
None => (
|
None => (
|
||||||
current_fields_ids_map.clone(),
|
current_fields_ids_map.clone(),
|
||||||
current_searchable_fields.map(ToOwned::to_owned),
|
None,
|
||||||
current_displayed_fields.map(ToOwned::to_owned),
|
current_displayed_fields.map(ToOwned::to_owned),
|
||||||
),
|
),
|
||||||
};
|
};
|
||||||
|
Loading…
Reference in New Issue
Block a user