mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-23 05:00:06 +01:00
Stops returning an option in the internal searchable fields
This commit is contained in:
parent
76bb6d565c
commit
c22460045c
28
milli/src/fieldids_weights_map.rs
Normal file
28
milli/src/fieldids_weights_map.rs
Normal file
@ -0,0 +1,28 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::{FieldId, Weight};
|
||||
|
||||
#[derive(Debug, Default, Serialize, Deserialize)]
|
||||
pub struct FieldidsWeightsMap {
|
||||
map: HashMap<FieldId, Weight>,
|
||||
}
|
||||
|
||||
impl FieldidsWeightsMap {
|
||||
pub fn insert(&mut self, fid: FieldId, weight: Weight) -> Option<Weight> {
|
||||
self.map.insert(fid, weight)
|
||||
}
|
||||
|
||||
pub fn remove(&mut self, fid: FieldId) -> Option<Weight> {
|
||||
self.map.remove(&fid)
|
||||
}
|
||||
|
||||
pub fn weight(&self, fid: FieldId) -> Option<Weight> {
|
||||
self.map.get(&fid).copied()
|
||||
}
|
||||
|
||||
pub fn max_weight(&self) -> Option<Weight> {
|
||||
self.map.values().copied().max()
|
||||
}
|
||||
}
|
@ -1,5 +1,6 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
|
||||
use std::convert::TryInto;
|
||||
use std::fs::File;
|
||||
use std::path::Path;
|
||||
|
||||
@ -25,8 +26,9 @@ use crate::proximity::ProximityPrecision;
|
||||
use crate::vector::EmbeddingConfig;
|
||||
use crate::{
|
||||
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
|
||||
FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, GeoPoint, ObkvCodec,
|
||||
Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16, BEU32, BEU64,
|
||||
FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, FieldidsWeightsMap,
|
||||
GeoPoint, ObkvCodec, Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec,
|
||||
BEU16, BEU32, BEU64,
|
||||
};
|
||||
|
||||
pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
|
||||
@ -42,6 +44,7 @@ pub mod main_key {
|
||||
pub const SORTABLE_FIELDS_KEY: &str = "sortable-fields";
|
||||
pub const FIELD_DISTRIBUTION_KEY: &str = "fields-distribution";
|
||||
pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map";
|
||||
pub const FIELDIDS_WEIGHTS_MAP_KEY: &str = "fieldids-weights-map";
|
||||
pub const GEO_FACETED_DOCUMENTS_IDS_KEY: &str = "geo-faceted-documents-ids";
|
||||
pub const GEO_RTREE_KEY: &str = "geo-rtree";
|
||||
pub const PRIMARY_KEY_KEY: &str = "primary-key";
|
||||
@ -414,6 +417,32 @@ impl Index {
|
||||
.unwrap_or_default())
|
||||
}
|
||||
|
||||
/* fieldids weights map */
|
||||
// This maps the fields ids to their weights.
|
||||
// Their weights is defined by the ordering of the searchable attributes.
|
||||
|
||||
/// Writes the fieldids weights map which associates the field ids to their weights
|
||||
pub(crate) fn put_fieldids_weights_map(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
map: &FieldidsWeightsMap,
|
||||
) -> heed::Result<()> {
|
||||
self.main.remap_types::<Str, SerdeJson<_>>().put(
|
||||
wtxn,
|
||||
main_key::FIELDIDS_WEIGHTS_MAP_KEY,
|
||||
map,
|
||||
)
|
||||
}
|
||||
|
||||
/// Get the fieldids weights map which associates the field ids to their weights
|
||||
pub fn fieldids_weights_map(&self, rtxn: &RoTxn) -> heed::Result<FieldidsWeightsMap> {
|
||||
Ok(self
|
||||
.main
|
||||
.remap_types::<Str, SerdeJson<_>>()
|
||||
.get(rtxn, main_key::FIELDIDS_WEIGHTS_MAP_KEY)?
|
||||
.unwrap_or_default())
|
||||
}
|
||||
|
||||
/* geo rtree */
|
||||
|
||||
/// Writes the provided `rtree` which associates coordinates to documents ids.
|
||||
@ -578,10 +607,12 @@ impl Index {
|
||||
wtxn: &mut RwTxn,
|
||||
user_fields: &[&str],
|
||||
fields_ids_map: &FieldsIdsMap,
|
||||
) -> heed::Result<()> {
|
||||
) -> Result<()> {
|
||||
// We can write the user defined searchable fields as-is.
|
||||
self.put_user_defined_searchable_fields(wtxn, user_fields)?;
|
||||
|
||||
let mut weights = self.fieldids_weights_map(&wtxn)?;
|
||||
|
||||
// Now we generate the real searchable fields:
|
||||
// 1. Take the user defined searchable fields as-is to keep the priority defined by the attributes criterion.
|
||||
// 2. Iterate over the user defined searchable fields.
|
||||
@ -589,17 +620,23 @@ impl Index {
|
||||
// (ie doggo.name is a subset of doggo) then we push it at the end of the fields.
|
||||
let mut real_fields = user_fields.to_vec();
|
||||
|
||||
for field_from_map in fields_ids_map.names() {
|
||||
for user_field in user_fields {
|
||||
for (id, field_from_map) in fields_ids_map.iter() {
|
||||
for (weight, user_field) in user_fields.iter().enumerate() {
|
||||
if crate::is_faceted_by(field_from_map, user_field)
|
||||
&& !user_fields.contains(&field_from_map)
|
||||
{
|
||||
real_fields.push(field_from_map);
|
||||
|
||||
let weight: u16 =
|
||||
weight.try_into().map_err(|_| UserError::AttributeLimitReached)?;
|
||||
weights.insert(id, weight as u16);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.put_searchable_fields(wtxn, &real_fields)
|
||||
self.put_searchable_fields(wtxn, &real_fields)?;
|
||||
self.put_fieldids_weights_map(wtxn, &weights)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn delete_all_searchable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
@ -623,28 +660,31 @@ impl Index {
|
||||
}
|
||||
|
||||
/// Returns the searchable fields, those are the fields that are indexed,
|
||||
/// if the searchable fields aren't there it means that **all** the fields are indexed.
|
||||
pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<Vec<&'t str>>> {
|
||||
pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Vec<Cow<'t, str>>> {
|
||||
self.main
|
||||
.remap_types::<Str, SerdeBincode<Vec<&'t str>>>()
|
||||
.get(rtxn, main_key::SEARCHABLE_FIELDS_KEY)
|
||||
.get(rtxn, main_key::SEARCHABLE_FIELDS_KEY)?
|
||||
.map(|fields| Ok(fields.into_iter().map(|field| Cow::Borrowed(field)).collect()))
|
||||
.unwrap_or_else(|| {
|
||||
Ok(self
|
||||
.fields_ids_map(rtxn)?
|
||||
.names()
|
||||
.map(|field| Cow::Owned(field.to_string()))
|
||||
.collect())
|
||||
})
|
||||
}
|
||||
|
||||
/// Identical to `searchable_fields`, but returns the ids instead.
|
||||
pub fn searchable_fields_ids(&self, rtxn: &RoTxn) -> Result<Option<Vec<FieldId>>> {
|
||||
match self.searchable_fields(rtxn)? {
|
||||
Some(fields) => {
|
||||
let fields_ids_map = self.fields_ids_map(rtxn)?;
|
||||
let mut fields_ids = Vec::new();
|
||||
for name in fields {
|
||||
if let Some(field_id) = fields_ids_map.id(name) {
|
||||
fields_ids.push(field_id);
|
||||
}
|
||||
}
|
||||
Ok(Some(fields_ids))
|
||||
pub fn searchable_fields_ids(&self, rtxn: &RoTxn) -> Result<Vec<FieldId>> {
|
||||
let fields = self.searchable_fields(rtxn)?;
|
||||
let fields_ids_map = self.fields_ids_map(rtxn)?;
|
||||
let mut fields_ids = Vec::new();
|
||||
for name in fields {
|
||||
if let Some(field_id) = fields_ids_map.id(&name) {
|
||||
fields_ids.push(field_id);
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
Ok(fields_ids)
|
||||
}
|
||||
|
||||
/// Writes the searchable fields, when this list is specified, only these are indexed.
|
||||
@ -1710,10 +1750,14 @@ pub(crate) mod tests {
|
||||
]))
|
||||
.unwrap();
|
||||
|
||||
db_snap!(index, field_distribution, 1);
|
||||
db_snap!(index, field_distribution, @r###"
|
||||
age 1 |
|
||||
id 2 |
|
||||
name 2 |
|
||||
"###);
|
||||
|
||||
db_snap!(index, word_docids,
|
||||
@r###"
|
||||
@r###"
|
||||
1 [0, ]
|
||||
2 [1, ]
|
||||
20 [1, ]
|
||||
@ -1722,18 +1766,6 @@ pub(crate) mod tests {
|
||||
"###
|
||||
);
|
||||
|
||||
db_snap!(index, field_distribution);
|
||||
|
||||
db_snap!(index, field_distribution,
|
||||
@r###"
|
||||
age 1 |
|
||||
id 2 |
|
||||
name 2 |
|
||||
"###
|
||||
);
|
||||
|
||||
// snapshot_index!(&index, "1", include: "^field_distribution$");
|
||||
|
||||
// we add all the documents a second time. we are supposed to get the same
|
||||
// field_distribution in the end
|
||||
index
|
||||
@ -1820,7 +1852,7 @@ pub(crate) mod tests {
|
||||
// ensure we get the right real searchable fields + user defined searchable fields
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let real = index.searchable_fields(&rtxn).unwrap().unwrap();
|
||||
let real = index.searchable_fields(&rtxn).unwrap();
|
||||
assert_eq!(real, &["doggo", "name", "doggo.name", "doggo.age"]);
|
||||
|
||||
let user_defined = index.user_defined_searchable_fields(&rtxn).unwrap().unwrap();
|
||||
@ -1840,7 +1872,7 @@ pub(crate) mod tests {
|
||||
// ensure we get the right real searchable fields + user defined searchable fields
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let real = index.searchable_fields(&rtxn).unwrap().unwrap();
|
||||
let real = index.searchable_fields(&rtxn).unwrap();
|
||||
assert_eq!(real, &["doggo", "name"]);
|
||||
let user_defined = index.user_defined_searchable_fields(&rtxn).unwrap().unwrap();
|
||||
assert_eq!(user_defined, &["doggo", "name"]);
|
||||
@ -1856,7 +1888,7 @@ pub(crate) mod tests {
|
||||
// ensure we get the right real searchable fields + user defined searchable fields
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let real = index.searchable_fields(&rtxn).unwrap().unwrap();
|
||||
let real = index.searchable_fields(&rtxn).unwrap();
|
||||
assert_eq!(real, &["doggo", "name", "doggo.name", "doggo.age"]);
|
||||
|
||||
let user_defined = index.user_defined_searchable_fields(&rtxn).unwrap().unwrap();
|
||||
|
@ -28,6 +28,7 @@ pub mod vector;
|
||||
#[cfg(test)]
|
||||
#[macro_use]
|
||||
pub mod snapshot_tests;
|
||||
mod fieldids_weights_map;
|
||||
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::convert::{TryFrom, TryInto};
|
||||
@ -52,6 +53,7 @@ pub use self::error::{
|
||||
Error, FieldIdMapMissingEntry, InternalError, SerializationError, UserError,
|
||||
};
|
||||
pub use self::external_documents_ids::ExternalDocumentsIds;
|
||||
pub use self::fieldids_weights_map::FieldidsWeightsMap;
|
||||
pub use self::fields_ids_map::FieldsIdsMap;
|
||||
pub use self::heed_codec::{
|
||||
BEU16StrCodec, BEU32StrCodec, BoRoaringBitmapCodec, BoRoaringBitmapLenCodec,
|
||||
@ -77,6 +79,7 @@ pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>;
|
||||
pub type FastMap8<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher64>>;
|
||||
pub type FieldDistribution = BTreeMap<String, u64>;
|
||||
pub type FieldId = u16;
|
||||
pub type Weight = u16;
|
||||
pub type Object = serde_json::Map<String, serde_json::Value>;
|
||||
pub type Position = u32;
|
||||
pub type RelativePosition = u16;
|
||||
|
@ -315,11 +315,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
.map_err(heed::Error::Decoding)?
|
||||
} else {
|
||||
// Compute the distance at the attribute level and store it in the cache.
|
||||
let fids = if let Some(fids) = self.index.searchable_fields_ids(self.txn)? {
|
||||
fids
|
||||
} else {
|
||||
self.index.fields_ids_map(self.txn)?.ids().collect()
|
||||
};
|
||||
let fids = self.index.searchable_fields_ids(self.txn)?;
|
||||
let mut docids = RoaringBitmap::new();
|
||||
for fid in fids {
|
||||
// for each field, intersect left word bitmap and right word bitmap,
|
||||
@ -408,11 +404,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
let prefix_docids = match proximity_precision {
|
||||
ProximityPrecision::ByAttribute => {
|
||||
// Compute the distance at the attribute level and store it in the cache.
|
||||
let fids = if let Some(fids) = self.index.searchable_fields_ids(self.txn)? {
|
||||
fids
|
||||
} else {
|
||||
self.index.fields_ids_map(self.txn)?.ids().collect()
|
||||
};
|
||||
let fids = self.index.searchable_fields_ids(self.txn)?;
|
||||
let mut prefix_docids = RoaringBitmap::new();
|
||||
// for each field, intersect left word bitmap and right word bitmap,
|
||||
// then merge the result in a global bitmap before storing it in the cache.
|
||||
|
@ -184,13 +184,7 @@ impl State {
|
||||
return Ok(State::Empty(query_graph.clone()));
|
||||
}
|
||||
|
||||
let searchable_fields_ids = {
|
||||
if let Some(fids) = ctx.index.searchable_fields_ids(ctx.txn)? {
|
||||
fids
|
||||
} else {
|
||||
ctx.index.fields_ids_map(ctx.txn)?.ids().collect()
|
||||
}
|
||||
};
|
||||
let searchable_fields_ids = ctx.index.searchable_fields_ids(ctx.txn)?;
|
||||
|
||||
let mut candidates_per_attribute = Vec::with_capacity(searchable_fields_ids.len());
|
||||
// then check that there exists at least one attribute that has all of the terms
|
||||
|
@ -96,27 +96,22 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
contains_wildcard = true;
|
||||
continue;
|
||||
}
|
||||
let searchable_contains_name =
|
||||
searchable_names.as_ref().map(|sn| sn.iter().any(|name| name == field_name));
|
||||
let searchable_contains_name = searchable_names.iter().any(|name| name == field_name);
|
||||
let fid = match (fids_map.id(field_name), searchable_contains_name) {
|
||||
// The Field id exist and the field is searchable
|
||||
(Some(fid), Some(true)) | (Some(fid), None) => fid,
|
||||
(Some(fid), true) => fid,
|
||||
// The field is searchable but the Field id doesn't exist => Internal Error
|
||||
(None, Some(true)) => {
|
||||
(None, true) => {
|
||||
return Err(FieldIdMapMissingEntry::FieldName {
|
||||
field_name: field_name.to_string(),
|
||||
process: "search",
|
||||
}
|
||||
.into())
|
||||
}
|
||||
// The field is not searchable, but the searchableAttributes are set to * => ignore field
|
||||
(None, None) => continue,
|
||||
// The field is not searchable => User error
|
||||
(_fid, Some(false)) => {
|
||||
let (valid_fields, hidden_fields) = match searchable_names {
|
||||
Some(sn) => self.index.remove_hidden_fields(self.txn, sn)?,
|
||||
None => self.index.remove_hidden_fields(self.txn, fids_map.names())?,
|
||||
};
|
||||
(_fid, false) => {
|
||||
let (valid_fields, hidden_fields) =
|
||||
self.index.remove_hidden_fields(self.txn, searchable_names)?;
|
||||
|
||||
let field = field_name.to_string();
|
||||
return Err(UserError::InvalidSearchableAttribute {
|
||||
|
@ -77,17 +77,7 @@ impl RankingRuleGraphTrait for FidGraph {
|
||||
}
|
||||
|
||||
// always lookup the max_fid if we don't already and add an artificial condition for max scoring
|
||||
let max_fid: Option<u16> = {
|
||||
if let Some(max_fid) = ctx
|
||||
.index
|
||||
.searchable_fields_ids(ctx.txn)?
|
||||
.map(|field_ids| field_ids.into_iter().max())
|
||||
{
|
||||
max_fid
|
||||
} else {
|
||||
ctx.index.fields_ids_map(ctx.txn)?.ids().max()
|
||||
}
|
||||
};
|
||||
let max_fid: Option<u16> = ctx.index.searchable_fields_ids(ctx.txn)?.into_iter().max();
|
||||
|
||||
if let Some(max_fid) = max_fid {
|
||||
if !all_fields.contains(&max_fid) {
|
||||
|
@ -186,7 +186,7 @@ fn searchable_fields_changed(
|
||||
) -> bool {
|
||||
let searchable_fields = &settings_diff.new.searchable_fields_ids;
|
||||
for (field_id, field_bytes) in obkv.iter() {
|
||||
if searchable_fields.as_ref().map_or(true, |sf| sf.contains(&field_id)) {
|
||||
if searchable_fields.contains(&field_id) {
|
||||
let del_add = KvReaderDelAdd::new(field_bytes);
|
||||
match (del_add.get(DelAdd::Deletion), del_add.get(DelAdd::Addition)) {
|
||||
// if both fields are None, check the next field.
|
||||
@ -298,7 +298,7 @@ fn lang_safe_tokens_from_document<'a>(
|
||||
/// Extract words mapped with their positions of a document.
|
||||
fn tokens_from_document<'a>(
|
||||
obkv: &KvReader<FieldId>,
|
||||
searchable_fields: &Option<Vec<FieldId>>,
|
||||
searchable_fields: &[FieldId],
|
||||
tokenizer: &Tokenizer,
|
||||
max_positions_per_attributes: u32,
|
||||
del_add: DelAdd,
|
||||
@ -309,7 +309,7 @@ fn tokens_from_document<'a>(
|
||||
let mut document_writer = KvWriterU16::new(&mut buffers.obkv_buffer);
|
||||
for (field_id, field_bytes) in obkv.iter() {
|
||||
// if field is searchable.
|
||||
if searchable_fields.as_ref().map_or(true, |sf| sf.contains(&field_id)) {
|
||||
if searchable_fields.as_ref().contains(&field_id) {
|
||||
// extract deletion or addition only.
|
||||
if let Some(field_bytes) = KvReaderDelAdd::new(field_bytes).get(del_add) {
|
||||
// parse json.
|
||||
|
@ -468,14 +468,9 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
Setting::Set(ref fields) => {
|
||||
// Check to see if the searchable fields changed before doing anything else
|
||||
let old_fields = self.index.searchable_fields(self.wtxn)?;
|
||||
let did_change = match old_fields {
|
||||
// If old_fields is Some, let's check to see if the fields actually changed
|
||||
Some(old_fields) => {
|
||||
let new_fields = fields.iter().map(String::as_str).collect::<Vec<_>>();
|
||||
new_fields != old_fields
|
||||
}
|
||||
// If old_fields is None, the fields have changed (because they are being set)
|
||||
None => true,
|
||||
let did_change = {
|
||||
let new_fields = fields.iter().map(String::as_str).collect::<Vec<_>>();
|
||||
new_fields != old_fields
|
||||
};
|
||||
if !did_change {
|
||||
return Ok(false);
|
||||
@ -1172,7 +1167,7 @@ pub(crate) struct InnerIndexSettings {
|
||||
pub user_defined_faceted_fields: HashSet<String>,
|
||||
pub user_defined_searchable_fields: Option<Vec<String>>,
|
||||
pub faceted_fields_ids: HashSet<FieldId>,
|
||||
pub searchable_fields_ids: Option<Vec<FieldId>>,
|
||||
pub searchable_fields_ids: Vec<FieldId>,
|
||||
pub exact_attributes: HashSet<FieldId>,
|
||||
pub proximity_precision: ProximityPrecision,
|
||||
pub embedding_configs: EmbeddingConfigs,
|
||||
@ -1517,6 +1512,7 @@ mod tests {
|
||||
use big_s::S;
|
||||
use heed::types::Bytes;
|
||||
use maplit::{btreemap, btreeset, hashset};
|
||||
use meili_snap::snapshot;
|
||||
|
||||
use super::*;
|
||||
use crate::error::Error;
|
||||
@ -1576,7 +1572,7 @@ mod tests {
|
||||
// Check that the searchable field have been reset and documents are found now.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let searchable_fields = index.searchable_fields(&rtxn).unwrap();
|
||||
assert_eq!(searchable_fields, None);
|
||||
snapshot!(format!("{searchable_fields:?}"), @r###"["name", "id", "age"]"###);
|
||||
let result = index.search(&rtxn).query("23").execute().unwrap();
|
||||
assert_eq!(result.documents_ids.len(), 1);
|
||||
let documents = index.documents(&rtxn, result.documents_ids).unwrap();
|
||||
|
Loading…
x
Reference in New Issue
Block a user