mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-10 21:44:34 +01:00
Merge #4631
4631: Split the field id map from the weight of each fields r=Kerollmops a=irevoire # Pull Request ## Related issue Fixes https://github.com/meilisearch/meilisearch/issues/4484 ## What does this PR do? - Make the (internal) searchable fields database always contain the searchable fields (instead of None when the user-defined searchable fields were not defined) - Introduce a new « fieldids_weights_map » that does the mapping between a fieldId and its Weight - Ensure that when two searchable fields are swapped, the field ID map doesn't change anymore (and thus, doesn't re-index) - Uses the weight instead of the order of the searchable fields in the attribute ranking rule at search time - When no searchable attributes are defined, make all their weights equal to zero - When a field is declared as searchable and contains nested fields, all its subfields share the same weight ## Impact on relevancy ### When no searchable attributes are declared When no searchable attributes are declared, all the fields have the same importance instead of randomly giving more importance to the field we've encountered « the most early » in the life of the index. This means that before this PR, send the following json: ```json [ { "id": 0, "name": "kefir", "color": "white" }, { "id": 1, "name": "white", "last name": "spirit" } ] ``` Would make the field `name` more important than the field `color` or `last name`. This means that searching for `white` would make the document `1` automatically higher ranked than the document `0`. After this PR, all the fields have the same weight, and none are considered more important than others. ### When a nested field is made searchable The second behavior change that happened with this PR is in the case you're sending this document, for example: ```json { "id": 0, "name": "tamo", "doggo": { "name": "kefir", "surname": "le kef" }, "catto": "gromez" } ``` Previously, defining the searchable attributes as: `["tamo", "doggo", "catto"]` was actually defining the « real » searchable attributes in the engine as: `["tamo", "doggo", "catto", "doggo.name", "doggo.surname"]`, which means that `doggo.name` and `doggo.surname` were _NOT_ where the user expected them and had completely different weights than `doggo`. In this PR all the weights have been unified, and the « real » searchable fields look like this: ```json [ "tamo", "doggo", "doggo.name", "doggo.surname", "catto"] ^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^ Weight 0 Weight 1 Weight 2 Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
commit
7c19c072fa
@ -272,9 +272,9 @@ pub fn swap_index_uid_in_task(task: &mut Task, swap: (&str, &str)) {
|
|||||||
}
|
}
|
||||||
for index_uid in index_uids {
|
for index_uid in index_uids {
|
||||||
if index_uid == swap.0 {
|
if index_uid == swap.0 {
|
||||||
*index_uid = swap.1.to_owned();
|
swap.1.clone_into(index_uid);
|
||||||
} else if index_uid == swap.1 {
|
} else if index_uid == swap.1 {
|
||||||
*index_uid = swap.0.to_owned();
|
swap.0.clone_into(index_uid);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -730,7 +730,7 @@ pub fn perform_search(
|
|||||||
let mut ids = BTreeSet::new();
|
let mut ids = BTreeSet::new();
|
||||||
for attr in attrs {
|
for attr in attrs {
|
||||||
if attr == "*" {
|
if attr == "*" {
|
||||||
ids = displayed_ids.clone();
|
ids.clone_from(&displayed_ids);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -85,8 +85,13 @@ impl SearchQueue {
|
|||||||
},
|
},
|
||||||
|
|
||||||
search_request = receive_new_searches.recv() => {
|
search_request = receive_new_searches.recv() => {
|
||||||
// this unwrap is safe because we're sure the `SearchQueue` still lives somewhere in actix-web
|
let search_request = match search_request {
|
||||||
let search_request = search_request.unwrap();
|
Some(search_request) => search_request,
|
||||||
|
// This should never happen while actix-web is running, but it's not a reason to crash
|
||||||
|
// and it can generate a lot of noise in the tests.
|
||||||
|
None => continue,
|
||||||
|
};
|
||||||
|
|
||||||
if searches_running < usize::from(parallelism) && queue.is_empty() {
|
if searches_running < usize::from(parallelism) && queue.is_empty() {
|
||||||
searches_running += 1;
|
searches_running += 1;
|
||||||
// if the search requests die it's not a hard error on our side
|
// if the search requests die it's not a hard error on our side
|
||||||
|
@ -85,8 +85,8 @@ async fn simple_search() {
|
|||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.996969696969697},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.996969696969697},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.9472135901451112}]"###);
|
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.9848484848484848},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.9472135901451112}]"###);
|
||||||
snapshot!(response["semanticHitCount"], @"1");
|
snapshot!(response["semanticHitCount"], @"2");
|
||||||
|
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(
|
.search_post(
|
||||||
@ -331,7 +331,7 @@ async fn query_combination() {
|
|||||||
.await;
|
.await;
|
||||||
|
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.996969696969697},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.996969696969697},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.8848484848484849}]"###);
|
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.9848484848484848},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.9848484848484848},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.9242424242424242}]"###);
|
||||||
snapshot!(response["semanticHitCount"], @"null");
|
snapshot!(response["semanticHitCount"], @"null");
|
||||||
|
|
||||||
// query + vector, no hybrid keyword =>
|
// query + vector, no hybrid keyword =>
|
||||||
@ -374,6 +374,6 @@ async fn query_combination() {
|
|||||||
.await;
|
.await;
|
||||||
|
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.9848484848484848}]"###);
|
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.9242424242424242}]"###);
|
||||||
snapshot!(response["semanticHitCount"], @"0");
|
snapshot!(response["semanticHitCount"], @"0");
|
||||||
}
|
}
|
||||||
|
@ -921,7 +921,7 @@ async fn test_score_details() {
|
|||||||
"order": 3,
|
"order": 3,
|
||||||
"attributeRankingOrderScore": 1.0,
|
"attributeRankingOrderScore": 1.0,
|
||||||
"queryWordDistanceScore": 0.8095238095238095,
|
"queryWordDistanceScore": 0.8095238095238095,
|
||||||
"score": 0.9727891156462584
|
"score": 0.8095238095238095
|
||||||
},
|
},
|
||||||
"exactness": {
|
"exactness": {
|
||||||
"order": 4,
|
"order": 4,
|
||||||
|
@ -285,10 +285,10 @@ async fn attributes_ranking_rule_order() {
|
|||||||
@r###"
|
@r###"
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
"id": "2"
|
"id": "1"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "1"
|
"id": "2"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
"###
|
"###
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
use actix_rt::time::sleep;
|
|
||||||
use meili_snap::{json_string, snapshot};
|
use meili_snap::{json_string, snapshot};
|
||||||
use meilisearch::option::ScheduleSnapshot;
|
use meilisearch::option::ScheduleSnapshot;
|
||||||
use meilisearch::Opt;
|
use meilisearch::Opt;
|
||||||
@ -53,11 +52,29 @@ async fn perform_snapshot() {
|
|||||||
|
|
||||||
index.load_test_set().await;
|
index.load_test_set().await;
|
||||||
|
|
||||||
server.index("test1").create(Some("prim")).await;
|
let (task, code) = server.index("test1").create(Some("prim")).await;
|
||||||
|
meili_snap::snapshot!(code, @"202 Accepted");
|
||||||
|
|
||||||
index.wait_task(2).await;
|
index.wait_task(task.uid()).await;
|
||||||
|
|
||||||
sleep(Duration::from_secs(2)).await;
|
// wait for the _next task_ to process, aka the snapshot that should be enqueued at some point
|
||||||
|
|
||||||
|
println!("waited for the next task to finish");
|
||||||
|
let now = std::time::Instant::now();
|
||||||
|
let next_task = task.uid() + 1;
|
||||||
|
loop {
|
||||||
|
let (value, code) = index.get_task(next_task).await;
|
||||||
|
dbg!(&value);
|
||||||
|
if code != 404 && value["status"].as_str() == Some("succeeded") {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if now.elapsed() > Duration::from_secs(30) {
|
||||||
|
panic!("The snapshot didn't schedule in 30s even though it was supposed to be scheduled every 2s: {}",
|
||||||
|
serde_json::to_string_pretty(&value).unwrap()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let temp = tempfile::tempdir().unwrap();
|
let temp = tempfile::tempdir().unwrap();
|
||||||
|
|
||||||
|
@ -48,7 +48,7 @@ fn main() -> Result<(), Box<dyn Error>> {
|
|||||||
|
|
||||||
let start = Instant::now();
|
let start = Instant::now();
|
||||||
|
|
||||||
let mut ctx = SearchContext::new(&index, &txn);
|
let mut ctx = SearchContext::new(&index, &txn)?;
|
||||||
let universe = filtered_universe(&ctx, &None)?;
|
let universe = filtered_universe(&ctx, &None)?;
|
||||||
|
|
||||||
let docs = execute_search(
|
let docs = execute_search(
|
||||||
|
@ -32,6 +32,8 @@ pub enum InternalError {
|
|||||||
DatabaseClosing,
|
DatabaseClosing,
|
||||||
#[error("Missing {} in the {db_name} database.", key.unwrap_or("key"))]
|
#[error("Missing {} in the {db_name} database.", key.unwrap_or("key"))]
|
||||||
DatabaseMissingEntry { db_name: &'static str, key: Option<&'static str> },
|
DatabaseMissingEntry { db_name: &'static str, key: Option<&'static str> },
|
||||||
|
#[error("Missing {key} in the fieldids weights mapping.")]
|
||||||
|
FieldidsWeightsMapMissingEntry { key: FieldId },
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
FieldIdMapMissingEntry(#[from] FieldIdMapMissingEntry),
|
FieldIdMapMissingEntry(#[from] FieldIdMapMissingEntry),
|
||||||
#[error("Missing {key} in the field id mapping.")]
|
#[error("Missing {key} in the field id mapping.")]
|
||||||
|
48
milli/src/fieldids_weights_map.rs
Normal file
48
milli/src/fieldids_weights_map.rs
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
//! The fieldids weights map is in charge of storing linking the searchable fields with their weights.
|
||||||
|
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use crate::{FieldId, FieldsIdsMap, Weight};
|
||||||
|
|
||||||
|
#[derive(Debug, Default, Serialize, Deserialize)]
|
||||||
|
pub struct FieldidsWeightsMap {
|
||||||
|
map: HashMap<FieldId, Weight>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FieldidsWeightsMap {
|
||||||
|
/// Insert a field id -> weigth into the map.
|
||||||
|
/// If the map did not have this key present, `None` is returned.
|
||||||
|
/// If the map did have this key present, the value is updated, and the old value is returned.
|
||||||
|
pub fn insert(&mut self, fid: FieldId, weight: Weight) -> Option<Weight> {
|
||||||
|
self.map.insert(fid, weight)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create the map from the fields ids maps.
|
||||||
|
/// Should only be called in the case there are NO searchable attributes.
|
||||||
|
/// All the fields will be inserted in the order of the fields ids map with a weight of 0.
|
||||||
|
pub fn from_field_id_map_without_searchable(fid_map: &FieldsIdsMap) -> Self {
|
||||||
|
FieldidsWeightsMap { map: fid_map.ids().map(|fid| (fid, 0)).collect() }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Removes a field id from the map, returning the associated weight previously in the map.
|
||||||
|
pub fn remove(&mut self, fid: FieldId) -> Option<Weight> {
|
||||||
|
self.map.remove(&fid)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns weight corresponding to the key.
|
||||||
|
pub fn weight(&self, fid: FieldId) -> Option<Weight> {
|
||||||
|
self.map.get(&fid).copied()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns highest weight contained in the map if any.
|
||||||
|
pub fn max_weight(&self) -> Option<Weight> {
|
||||||
|
self.map.values().copied().max()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return an iterator visiting all field ids in arbitrary order.
|
||||||
|
pub fn ids(&self) -> impl Iterator<Item = FieldId> + '_ {
|
||||||
|
self.map.keys().copied()
|
||||||
|
}
|
||||||
|
}
|
@ -1,5 +1,6 @@
|
|||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
|
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
|
||||||
|
use std::convert::TryInto;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
@ -25,8 +26,9 @@ use crate::proximity::ProximityPrecision;
|
|||||||
use crate::vector::EmbeddingConfig;
|
use crate::vector::EmbeddingConfig;
|
||||||
use crate::{
|
use crate::{
|
||||||
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
|
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
|
||||||
FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, GeoPoint, ObkvCodec,
|
FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec,
|
||||||
Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16, BEU32, BEU64,
|
FieldidsWeightsMap, GeoPoint, ObkvCodec, Result, RoaringBitmapCodec, RoaringBitmapLenCodec,
|
||||||
|
Search, U8StrStrCodec, Weight, BEU16, BEU32, BEU64,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
|
pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
|
||||||
@ -42,6 +44,7 @@ pub mod main_key {
|
|||||||
pub const SORTABLE_FIELDS_KEY: &str = "sortable-fields";
|
pub const SORTABLE_FIELDS_KEY: &str = "sortable-fields";
|
||||||
pub const FIELD_DISTRIBUTION_KEY: &str = "fields-distribution";
|
pub const FIELD_DISTRIBUTION_KEY: &str = "fields-distribution";
|
||||||
pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map";
|
pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map";
|
||||||
|
pub const FIELDIDS_WEIGHTS_MAP_KEY: &str = "fieldids-weights-map";
|
||||||
pub const GEO_FACETED_DOCUMENTS_IDS_KEY: &str = "geo-faceted-documents-ids";
|
pub const GEO_FACETED_DOCUMENTS_IDS_KEY: &str = "geo-faceted-documents-ids";
|
||||||
pub const GEO_RTREE_KEY: &str = "geo-rtree";
|
pub const GEO_RTREE_KEY: &str = "geo-rtree";
|
||||||
pub const PRIMARY_KEY_KEY: &str = "primary-key";
|
pub const PRIMARY_KEY_KEY: &str = "primary-key";
|
||||||
@ -414,6 +417,65 @@ impl Index {
|
|||||||
.unwrap_or_default())
|
.unwrap_or_default())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* fieldids weights map */
|
||||||
|
// This maps the fields ids to their weights.
|
||||||
|
// Their weights is defined by the ordering of the searchable attributes.
|
||||||
|
|
||||||
|
/// Writes the fieldids weights map which associates the field ids to their weights
|
||||||
|
pub(crate) fn put_fieldids_weights_map(
|
||||||
|
&self,
|
||||||
|
wtxn: &mut RwTxn,
|
||||||
|
map: &FieldidsWeightsMap,
|
||||||
|
) -> heed::Result<()> {
|
||||||
|
self.main.remap_types::<Str, SerdeJson<_>>().put(
|
||||||
|
wtxn,
|
||||||
|
main_key::FIELDIDS_WEIGHTS_MAP_KEY,
|
||||||
|
map,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the fieldids weights map which associates the field ids to their weights
|
||||||
|
pub fn fieldids_weights_map(&self, rtxn: &RoTxn) -> heed::Result<FieldidsWeightsMap> {
|
||||||
|
self.main
|
||||||
|
.remap_types::<Str, SerdeJson<_>>()
|
||||||
|
.get(rtxn, main_key::FIELDIDS_WEIGHTS_MAP_KEY)?
|
||||||
|
.map(Ok)
|
||||||
|
.unwrap_or_else(|| {
|
||||||
|
Ok(FieldidsWeightsMap::from_field_id_map_without_searchable(
|
||||||
|
&self.fields_ids_map(rtxn)?,
|
||||||
|
))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Delete the fieldsids weights map
|
||||||
|
pub fn delete_fieldids_weights_map(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||||
|
self.main.remap_key_type::<Str>().delete(wtxn, main_key::FIELDIDS_WEIGHTS_MAP_KEY)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn searchable_fields_and_weights<'a>(
|
||||||
|
&self,
|
||||||
|
rtxn: &'a RoTxn,
|
||||||
|
) -> Result<Vec<(Cow<'a, str>, FieldId, Weight)>> {
|
||||||
|
let fid_map = self.fields_ids_map(rtxn)?;
|
||||||
|
let weight_map = self.fieldids_weights_map(rtxn)?;
|
||||||
|
let searchable = self.searchable_fields(rtxn)?;
|
||||||
|
|
||||||
|
searchable
|
||||||
|
.into_iter()
|
||||||
|
.map(|field| -> Result<_> {
|
||||||
|
let fid = fid_map.id(&field).ok_or_else(|| FieldIdMapMissingEntry::FieldName {
|
||||||
|
field_name: field.to_string(),
|
||||||
|
process: "searchable_fields_and_weights",
|
||||||
|
})?;
|
||||||
|
let weight = weight_map
|
||||||
|
.weight(fid)
|
||||||
|
.ok_or(InternalError::FieldidsWeightsMapMissingEntry { key: fid })?;
|
||||||
|
|
||||||
|
Ok((field, fid, weight))
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
/* geo rtree */
|
/* geo rtree */
|
||||||
|
|
||||||
/// Writes the provided `rtree` which associates coordinates to documents ids.
|
/// Writes the provided `rtree` which associates coordinates to documents ids.
|
||||||
@ -578,33 +640,42 @@ impl Index {
|
|||||||
wtxn: &mut RwTxn,
|
wtxn: &mut RwTxn,
|
||||||
user_fields: &[&str],
|
user_fields: &[&str],
|
||||||
fields_ids_map: &FieldsIdsMap,
|
fields_ids_map: &FieldsIdsMap,
|
||||||
) -> heed::Result<()> {
|
) -> Result<()> {
|
||||||
// We can write the user defined searchable fields as-is.
|
// We can write the user defined searchable fields as-is.
|
||||||
self.put_user_defined_searchable_fields(wtxn, user_fields)?;
|
self.put_user_defined_searchable_fields(wtxn, user_fields)?;
|
||||||
|
|
||||||
|
let mut weights = FieldidsWeightsMap::default();
|
||||||
|
|
||||||
// Now we generate the real searchable fields:
|
// Now we generate the real searchable fields:
|
||||||
// 1. Take the user defined searchable fields as-is to keep the priority defined by the attributes criterion.
|
// 1. Take the user defined searchable fields as-is to keep the priority defined by the attributes criterion.
|
||||||
// 2. Iterate over the user defined searchable fields.
|
// 2. Iterate over the user defined searchable fields.
|
||||||
// 3. If a user defined field is a subset of a field defined in the fields_ids_map
|
// 3. If a user defined field is a subset of a field defined in the fields_ids_map
|
||||||
// (ie doggo.name is a subset of doggo) then we push it at the end of the fields.
|
// (ie doggo.name is a subset of doggo) right after doggo and with the same weight.
|
||||||
let mut real_fields = user_fields.to_vec();
|
let mut real_fields = Vec::new();
|
||||||
|
|
||||||
for field_from_map in fields_ids_map.names() {
|
for (id, field_from_map) in fields_ids_map.iter() {
|
||||||
for user_field in user_fields {
|
for (weight, user_field) in user_fields.iter().enumerate() {
|
||||||
if crate::is_faceted_by(field_from_map, user_field)
|
if crate::is_faceted_by(field_from_map, user_field)
|
||||||
&& !user_fields.contains(&field_from_map)
|
&& !real_fields.contains(&field_from_map)
|
||||||
{
|
{
|
||||||
real_fields.push(field_from_map);
|
real_fields.push(field_from_map);
|
||||||
|
|
||||||
|
let weight: u16 =
|
||||||
|
weight.try_into().map_err(|_| UserError::AttributeLimitReached)?;
|
||||||
|
weights.insert(id, weight);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
self.put_searchable_fields(wtxn, &real_fields)
|
self.put_searchable_fields(wtxn, &real_fields)?;
|
||||||
|
self.put_fieldids_weights_map(wtxn, &weights)?;
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn delete_all_searchable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
pub(crate) fn delete_all_searchable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||||
let did_delete_searchable = self.delete_searchable_fields(wtxn)?;
|
let did_delete_searchable = self.delete_searchable_fields(wtxn)?;
|
||||||
let did_delete_user_defined = self.delete_user_defined_searchable_fields(wtxn)?;
|
let did_delete_user_defined = self.delete_user_defined_searchable_fields(wtxn)?;
|
||||||
|
self.delete_fieldids_weights_map(wtxn)?;
|
||||||
Ok(did_delete_searchable || did_delete_user_defined)
|
Ok(did_delete_searchable || did_delete_user_defined)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -623,28 +694,31 @@ impl Index {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the searchable fields, those are the fields that are indexed,
|
/// Returns the searchable fields, those are the fields that are indexed,
|
||||||
/// if the searchable fields aren't there it means that **all** the fields are indexed.
|
pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Vec<Cow<'t, str>>> {
|
||||||
pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<Vec<&'t str>>> {
|
|
||||||
self.main
|
self.main
|
||||||
.remap_types::<Str, SerdeBincode<Vec<&'t str>>>()
|
.remap_types::<Str, SerdeBincode<Vec<&'t str>>>()
|
||||||
.get(rtxn, main_key::SEARCHABLE_FIELDS_KEY)
|
.get(rtxn, main_key::SEARCHABLE_FIELDS_KEY)?
|
||||||
|
.map(|fields| Ok(fields.into_iter().map(Cow::Borrowed).collect()))
|
||||||
|
.unwrap_or_else(|| {
|
||||||
|
Ok(self
|
||||||
|
.fields_ids_map(rtxn)?
|
||||||
|
.names()
|
||||||
|
.map(|field| Cow::Owned(field.to_string()))
|
||||||
|
.collect())
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Identical to `searchable_fields`, but returns the ids instead.
|
/// Identical to `searchable_fields`, but returns the ids instead.
|
||||||
pub fn searchable_fields_ids(&self, rtxn: &RoTxn) -> Result<Option<Vec<FieldId>>> {
|
pub fn searchable_fields_ids(&self, rtxn: &RoTxn) -> Result<Vec<FieldId>> {
|
||||||
match self.searchable_fields(rtxn)? {
|
let fields = self.searchable_fields(rtxn)?;
|
||||||
Some(fields) => {
|
let fields_ids_map = self.fields_ids_map(rtxn)?;
|
||||||
let fields_ids_map = self.fields_ids_map(rtxn)?;
|
let mut fields_ids = Vec::new();
|
||||||
let mut fields_ids = Vec::new();
|
for name in fields {
|
||||||
for name in fields {
|
if let Some(field_id) = fields_ids_map.id(&name) {
|
||||||
if let Some(field_id) = fields_ids_map.id(name) {
|
fields_ids.push(field_id);
|
||||||
fields_ids.push(field_id);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(Some(fields_ids))
|
|
||||||
}
|
}
|
||||||
None => Ok(None),
|
|
||||||
}
|
}
|
||||||
|
Ok(fields_ids)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Writes the searchable fields, when this list is specified, only these are indexed.
|
/// Writes the searchable fields, when this list is specified, only these are indexed.
|
||||||
@ -1710,10 +1784,14 @@ pub(crate) mod tests {
|
|||||||
]))
|
]))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
db_snap!(index, field_distribution, 1);
|
db_snap!(index, field_distribution, @r###"
|
||||||
|
age 1 |
|
||||||
|
id 2 |
|
||||||
|
name 2 |
|
||||||
|
"###);
|
||||||
|
|
||||||
db_snap!(index, word_docids,
|
db_snap!(index, word_docids,
|
||||||
@r###"
|
@r###"
|
||||||
1 [0, ]
|
1 [0, ]
|
||||||
2 [1, ]
|
2 [1, ]
|
||||||
20 [1, ]
|
20 [1, ]
|
||||||
@ -1722,18 +1800,6 @@ pub(crate) mod tests {
|
|||||||
"###
|
"###
|
||||||
);
|
);
|
||||||
|
|
||||||
db_snap!(index, field_distribution);
|
|
||||||
|
|
||||||
db_snap!(index, field_distribution,
|
|
||||||
@r###"
|
|
||||||
age 1 |
|
|
||||||
id 2 |
|
|
||||||
name 2 |
|
|
||||||
"###
|
|
||||||
);
|
|
||||||
|
|
||||||
// snapshot_index!(&index, "1", include: "^field_distribution$");
|
|
||||||
|
|
||||||
// we add all the documents a second time. we are supposed to get the same
|
// we add all the documents a second time. we are supposed to get the same
|
||||||
// field_distribution in the end
|
// field_distribution in the end
|
||||||
index
|
index
|
||||||
@ -1820,7 +1886,7 @@ pub(crate) mod tests {
|
|||||||
// ensure we get the right real searchable fields + user defined searchable fields
|
// ensure we get the right real searchable fields + user defined searchable fields
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
|
||||||
let real = index.searchable_fields(&rtxn).unwrap().unwrap();
|
let real = index.searchable_fields(&rtxn).unwrap();
|
||||||
assert_eq!(real, &["doggo", "name", "doggo.name", "doggo.age"]);
|
assert_eq!(real, &["doggo", "name", "doggo.name", "doggo.age"]);
|
||||||
|
|
||||||
let user_defined = index.user_defined_searchable_fields(&rtxn).unwrap().unwrap();
|
let user_defined = index.user_defined_searchable_fields(&rtxn).unwrap().unwrap();
|
||||||
@ -1840,7 +1906,7 @@ pub(crate) mod tests {
|
|||||||
// ensure we get the right real searchable fields + user defined searchable fields
|
// ensure we get the right real searchable fields + user defined searchable fields
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
|
||||||
let real = index.searchable_fields(&rtxn).unwrap().unwrap();
|
let real = index.searchable_fields(&rtxn).unwrap();
|
||||||
assert_eq!(real, &["doggo", "name"]);
|
assert_eq!(real, &["doggo", "name"]);
|
||||||
let user_defined = index.user_defined_searchable_fields(&rtxn).unwrap().unwrap();
|
let user_defined = index.user_defined_searchable_fields(&rtxn).unwrap().unwrap();
|
||||||
assert_eq!(user_defined, &["doggo", "name"]);
|
assert_eq!(user_defined, &["doggo", "name"]);
|
||||||
@ -1856,7 +1922,7 @@ pub(crate) mod tests {
|
|||||||
// ensure we get the right real searchable fields + user defined searchable fields
|
// ensure we get the right real searchable fields + user defined searchable fields
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
|
||||||
let real = index.searchable_fields(&rtxn).unwrap().unwrap();
|
let real = index.searchable_fields(&rtxn).unwrap();
|
||||||
assert_eq!(real, &["doggo", "name", "doggo.name", "doggo.age"]);
|
assert_eq!(real, &["doggo", "name", "doggo.name", "doggo.age"]);
|
||||||
|
|
||||||
let user_defined = index.user_defined_searchable_fields(&rtxn).unwrap().unwrap();
|
let user_defined = index.user_defined_searchable_fields(&rtxn).unwrap().unwrap();
|
||||||
@ -2395,6 +2461,14 @@ pub(crate) mod tests {
|
|||||||
11 0
|
11 0
|
||||||
4 1
|
4 1
|
||||||
"###);
|
"###);
|
||||||
|
db_snap!(index, fields_ids_map, @r###"
|
||||||
|
0 primary_key |
|
||||||
|
"###);
|
||||||
|
db_snap!(index, searchable_fields, @r###"["primary_key"]"###);
|
||||||
|
db_snap!(index, fieldids_weights_map, @r###"
|
||||||
|
fid weight
|
||||||
|
0 0 |
|
||||||
|
"###);
|
||||||
|
|
||||||
index
|
index
|
||||||
.add_documents(documents!([
|
.add_documents(documents!([
|
||||||
@ -2410,6 +2484,16 @@ pub(crate) mod tests {
|
|||||||
11 0
|
11 0
|
||||||
4 1
|
4 1
|
||||||
"###);
|
"###);
|
||||||
|
db_snap!(index, fields_ids_map, @r###"
|
||||||
|
0 primary_key |
|
||||||
|
1 a |
|
||||||
|
"###);
|
||||||
|
db_snap!(index, searchable_fields, @r###"["primary_key", "a"]"###);
|
||||||
|
db_snap!(index, fieldids_weights_map, @r###"
|
||||||
|
fid weight
|
||||||
|
0 0 |
|
||||||
|
1 0 |
|
||||||
|
"###);
|
||||||
|
|
||||||
index.delete_documents(Default::default());
|
index.delete_documents(Default::default());
|
||||||
|
|
||||||
@ -2420,6 +2504,16 @@ pub(crate) mod tests {
|
|||||||
11 0
|
11 0
|
||||||
4 1
|
4 1
|
||||||
"###);
|
"###);
|
||||||
|
db_snap!(index, fields_ids_map, @r###"
|
||||||
|
0 primary_key |
|
||||||
|
1 a |
|
||||||
|
"###);
|
||||||
|
db_snap!(index, searchable_fields, @r###"["primary_key", "a"]"###);
|
||||||
|
db_snap!(index, fieldids_weights_map, @r###"
|
||||||
|
fid weight
|
||||||
|
0 0 |
|
||||||
|
1 0 |
|
||||||
|
"###);
|
||||||
|
|
||||||
index
|
index
|
||||||
.add_documents(documents!([
|
.add_documents(documents!([
|
||||||
@ -2435,6 +2529,16 @@ pub(crate) mod tests {
|
|||||||
11 0
|
11 0
|
||||||
4 1
|
4 1
|
||||||
"###);
|
"###);
|
||||||
|
db_snap!(index, fields_ids_map, @r###"
|
||||||
|
0 primary_key |
|
||||||
|
1 a |
|
||||||
|
"###);
|
||||||
|
db_snap!(index, searchable_fields, @r###"["primary_key", "a"]"###);
|
||||||
|
db_snap!(index, fieldids_weights_map, @r###"
|
||||||
|
fid weight
|
||||||
|
0 0 |
|
||||||
|
1 0 |
|
||||||
|
"###);
|
||||||
|
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
let search = Search::new(&rtxn, &index);
|
let search = Search::new(&rtxn, &index);
|
||||||
@ -2520,4 +2624,104 @@ pub(crate) mod tests {
|
|||||||
|
|
||||||
db_snap!(index, geo_faceted_documents_ids); // ensure that no documents were inserted
|
db_snap!(index, geo_faceted_documents_ids); // ensure that no documents were inserted
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn swapping_searchable_attributes() {
|
||||||
|
// See https://github.com/meilisearch/meilisearch/issues/4484
|
||||||
|
|
||||||
|
let index = TempIndex::new();
|
||||||
|
|
||||||
|
index
|
||||||
|
.update_settings(|settings| {
|
||||||
|
settings.set_searchable_fields(vec![S("name")]);
|
||||||
|
settings.set_filterable_fields(HashSet::from([S("age")]));
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
index
|
||||||
|
.add_documents(documents!({ "id": 1, "name": "Many", "age": 28, "realName": "Maxime" }))
|
||||||
|
.unwrap();
|
||||||
|
db_snap!(index, fields_ids_map, @r###"
|
||||||
|
0 name |
|
||||||
|
1 id |
|
||||||
|
2 age |
|
||||||
|
3 realName |
|
||||||
|
"###);
|
||||||
|
db_snap!(index, searchable_fields, @r###"["name"]"###);
|
||||||
|
db_snap!(index, fieldids_weights_map, @r###"
|
||||||
|
fid weight
|
||||||
|
0 0 |
|
||||||
|
"###);
|
||||||
|
|
||||||
|
index
|
||||||
|
.update_settings(|settings| {
|
||||||
|
settings.set_searchable_fields(vec![S("name"), S("realName")]);
|
||||||
|
settings.set_filterable_fields(HashSet::from([S("age")]));
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// The order of the field id map shouldn't change
|
||||||
|
db_snap!(index, fields_ids_map, @r###"
|
||||||
|
0 name |
|
||||||
|
1 id |
|
||||||
|
2 age |
|
||||||
|
3 realName |
|
||||||
|
"###);
|
||||||
|
db_snap!(index, searchable_fields, @r###"["name", "realName"]"###);
|
||||||
|
db_snap!(index, fieldids_weights_map, @r###"
|
||||||
|
fid weight
|
||||||
|
0 0 |
|
||||||
|
3 1 |
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn attribute_weights_after_swapping_searchable_attributes() {
|
||||||
|
// See https://github.com/meilisearch/meilisearch/issues/4484
|
||||||
|
|
||||||
|
let index = TempIndex::new();
|
||||||
|
|
||||||
|
index
|
||||||
|
.update_settings(|settings| {
|
||||||
|
settings.set_searchable_fields(vec![S("name"), S("beverage")]);
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
index
|
||||||
|
.add_documents(documents!([
|
||||||
|
{ "id": 0, "name": "kefir", "beverage": "water" },
|
||||||
|
{ "id": 1, "name": "tamo", "beverage": "kefir" }
|
||||||
|
]))
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let mut search = index.search(&rtxn);
|
||||||
|
let results = search.query("kefir").execute().unwrap();
|
||||||
|
|
||||||
|
// We should find kefir the dog first
|
||||||
|
insta::assert_debug_snapshot!(results.documents_ids, @r###"
|
||||||
|
[
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
|
||||||
|
index
|
||||||
|
.update_settings(|settings| {
|
||||||
|
settings.set_searchable_fields(vec![S("beverage"), S("name")]);
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let mut search = index.search(&rtxn);
|
||||||
|
let results = search.query("kefir").execute().unwrap();
|
||||||
|
|
||||||
|
// We should find tamo first
|
||||||
|
insta::assert_debug_snapshot!(results.documents_ids, @r###"
|
||||||
|
[
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -28,6 +28,7 @@ pub mod vector;
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
pub mod snapshot_tests;
|
pub mod snapshot_tests;
|
||||||
|
mod fieldids_weights_map;
|
||||||
|
|
||||||
use std::collections::{BTreeMap, HashMap};
|
use std::collections::{BTreeMap, HashMap};
|
||||||
use std::convert::{TryFrom, TryInto};
|
use std::convert::{TryFrom, TryInto};
|
||||||
@ -52,6 +53,7 @@ pub use self::error::{
|
|||||||
Error, FieldIdMapMissingEntry, InternalError, SerializationError, UserError,
|
Error, FieldIdMapMissingEntry, InternalError, SerializationError, UserError,
|
||||||
};
|
};
|
||||||
pub use self::external_documents_ids::ExternalDocumentsIds;
|
pub use self::external_documents_ids::ExternalDocumentsIds;
|
||||||
|
pub use self::fieldids_weights_map::FieldidsWeightsMap;
|
||||||
pub use self::fields_ids_map::FieldsIdsMap;
|
pub use self::fields_ids_map::FieldsIdsMap;
|
||||||
pub use self::heed_codec::{
|
pub use self::heed_codec::{
|
||||||
BEU16StrCodec, BEU32StrCodec, BoRoaringBitmapCodec, BoRoaringBitmapLenCodec,
|
BEU16StrCodec, BEU32StrCodec, BoRoaringBitmapCodec, BoRoaringBitmapLenCodec,
|
||||||
@ -77,6 +79,7 @@ pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>;
|
|||||||
pub type FastMap8<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher64>>;
|
pub type FastMap8<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher64>>;
|
||||||
pub type FieldDistribution = BTreeMap<String, u64>;
|
pub type FieldDistribution = BTreeMap<String, u64>;
|
||||||
pub type FieldId = u16;
|
pub type FieldId = u16;
|
||||||
|
pub type Weight = u16;
|
||||||
pub type Object = serde_json::Map<String, serde_json::Value>;
|
pub type Object = serde_json::Map<String, serde_json::Value>;
|
||||||
pub type Position = u32;
|
pub type Position = u32;
|
||||||
pub type RelativePosition = u16;
|
pub type RelativePosition = u16;
|
||||||
|
@ -147,7 +147,7 @@ impl<'a> Search<'a> {
|
|||||||
|
|
||||||
pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result<RoaringBitmap> {
|
pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result<RoaringBitmap> {
|
||||||
if has_vector_search {
|
if has_vector_search {
|
||||||
let ctx = SearchContext::new(self.index, self.rtxn);
|
let ctx = SearchContext::new(self.index, self.rtxn)?;
|
||||||
filtered_universe(&ctx, &self.filter)
|
filtered_universe(&ctx, &self.filter)
|
||||||
} else {
|
} else {
|
||||||
Ok(self.execute()?.candidates)
|
Ok(self.execute()?.candidates)
|
||||||
@ -155,10 +155,10 @@ impl<'a> Search<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn execute(&self) -> Result<SearchResult> {
|
pub fn execute(&self) -> Result<SearchResult> {
|
||||||
let mut ctx = SearchContext::new(self.index, self.rtxn);
|
let mut ctx = SearchContext::new(self.index, self.rtxn)?;
|
||||||
|
|
||||||
if let Some(searchable_attributes) = self.searchable_attributes {
|
if let Some(searchable_attributes) = self.searchable_attributes {
|
||||||
ctx.searchable_attributes(searchable_attributes)?;
|
ctx.attributes_to_search_on(searchable_attributes)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
let universe = filtered_universe(&ctx, &self.filter)?;
|
let universe = filtered_universe(&ctx, &self.filter)?;
|
||||||
|
@ -101,7 +101,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
|||||||
|
|
||||||
let mut ranking_rule_universes: Vec<RoaringBitmap> =
|
let mut ranking_rule_universes: Vec<RoaringBitmap> =
|
||||||
vec![RoaringBitmap::default(); ranking_rules_len];
|
vec![RoaringBitmap::default(); ranking_rules_len];
|
||||||
ranking_rule_universes[0] = universe.clone();
|
ranking_rule_universes[0].clone_from(universe);
|
||||||
let mut cur_ranking_rule_index = 0;
|
let mut cur_ranking_rule_index = 0;
|
||||||
|
|
||||||
/// Finish iterating over the current ranking rule, yielding
|
/// Finish iterating over the current ranking rule, yielding
|
||||||
@ -232,7 +232,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
cur_ranking_rule_index += 1;
|
cur_ranking_rule_index += 1;
|
||||||
ranking_rule_universes[cur_ranking_rule_index] = next_bucket.candidates.clone();
|
ranking_rule_universes[cur_ranking_rule_index].clone_from(&next_bucket.candidates);
|
||||||
logger.start_iteration_ranking_rule(
|
logger.start_iteration_ranking_rule(
|
||||||
cur_ranking_rule_index,
|
cur_ranking_rule_index,
|
||||||
ranking_rules[cur_ranking_rule_index].as_ref(),
|
ranking_rules[cur_ranking_rule_index].as_ref(),
|
||||||
|
@ -163,7 +163,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
Some(restricted_fids) => {
|
Some(restricted_fids) => {
|
||||||
let interned = self.word_interner.get(word).as_str();
|
let interned = self.word_interner.get(word).as_str();
|
||||||
let keys: Vec<_> =
|
let keys: Vec<_> =
|
||||||
restricted_fids.tolerant.iter().map(|fid| (interned, *fid)).collect();
|
restricted_fids.tolerant.iter().map(|(fid, _)| (interned, *fid)).collect();
|
||||||
|
|
||||||
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
|
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
|
||||||
self.txn,
|
self.txn,
|
||||||
@ -192,7 +192,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
Some(restricted_fids) => {
|
Some(restricted_fids) => {
|
||||||
let interned = self.word_interner.get(word).as_str();
|
let interned = self.word_interner.get(word).as_str();
|
||||||
let keys: Vec<_> =
|
let keys: Vec<_> =
|
||||||
restricted_fids.exact.iter().map(|fid| (interned, *fid)).collect();
|
restricted_fids.exact.iter().map(|(fid, _)| (interned, *fid)).collect();
|
||||||
|
|
||||||
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
|
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
|
||||||
self.txn,
|
self.txn,
|
||||||
@ -242,7 +242,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
Some(restricted_fids) => {
|
Some(restricted_fids) => {
|
||||||
let interned = self.word_interner.get(prefix).as_str();
|
let interned = self.word_interner.get(prefix).as_str();
|
||||||
let keys: Vec<_> =
|
let keys: Vec<_> =
|
||||||
restricted_fids.tolerant.iter().map(|fid| (interned, *fid)).collect();
|
restricted_fids.tolerant.iter().map(|(fid, _)| (interned, *fid)).collect();
|
||||||
|
|
||||||
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
|
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
|
||||||
self.txn,
|
self.txn,
|
||||||
@ -271,7 +271,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
Some(restricted_fids) => {
|
Some(restricted_fids) => {
|
||||||
let interned = self.word_interner.get(prefix).as_str();
|
let interned = self.word_interner.get(prefix).as_str();
|
||||||
let keys: Vec<_> =
|
let keys: Vec<_> =
|
||||||
restricted_fids.exact.iter().map(|fid| (interned, *fid)).collect();
|
restricted_fids.exact.iter().map(|(fid, _)| (interned, *fid)).collect();
|
||||||
|
|
||||||
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
|
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
|
||||||
self.txn,
|
self.txn,
|
||||||
@ -315,11 +315,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
.map_err(heed::Error::Decoding)?
|
.map_err(heed::Error::Decoding)?
|
||||||
} else {
|
} else {
|
||||||
// Compute the distance at the attribute level and store it in the cache.
|
// Compute the distance at the attribute level and store it in the cache.
|
||||||
let fids = if let Some(fids) = self.index.searchable_fields_ids(self.txn)? {
|
let fids = self.index.searchable_fields_ids(self.txn)?;
|
||||||
fids
|
|
||||||
} else {
|
|
||||||
self.index.fields_ids_map(self.txn)?.ids().collect()
|
|
||||||
};
|
|
||||||
let mut docids = RoaringBitmap::new();
|
let mut docids = RoaringBitmap::new();
|
||||||
for fid in fids {
|
for fid in fids {
|
||||||
// for each field, intersect left word bitmap and right word bitmap,
|
// for each field, intersect left word bitmap and right word bitmap,
|
||||||
@ -408,11 +404,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
let prefix_docids = match proximity_precision {
|
let prefix_docids = match proximity_precision {
|
||||||
ProximityPrecision::ByAttribute => {
|
ProximityPrecision::ByAttribute => {
|
||||||
// Compute the distance at the attribute level and store it in the cache.
|
// Compute the distance at the attribute level and store it in the cache.
|
||||||
let fids = if let Some(fids) = self.index.searchable_fields_ids(self.txn)? {
|
let fids = self.index.searchable_fields_ids(self.txn)?;
|
||||||
fids
|
|
||||||
} else {
|
|
||||||
self.index.fields_ids_map(self.txn)?.ids().collect()
|
|
||||||
};
|
|
||||||
let mut prefix_docids = RoaringBitmap::new();
|
let mut prefix_docids = RoaringBitmap::new();
|
||||||
// for each field, intersect left word bitmap and right word bitmap,
|
// for each field, intersect left word bitmap and right word bitmap,
|
||||||
// then merge the result in a global bitmap before storing it in the cache.
|
// then merge the result in a global bitmap before storing it in the cache.
|
||||||
|
@ -184,13 +184,7 @@ impl State {
|
|||||||
return Ok(State::Empty(query_graph.clone()));
|
return Ok(State::Empty(query_graph.clone()));
|
||||||
}
|
}
|
||||||
|
|
||||||
let searchable_fields_ids = {
|
let searchable_fields_ids = ctx.index.searchable_fields_ids(ctx.txn)?;
|
||||||
if let Some(fids) = ctx.index.searchable_fields_ids(ctx.txn)? {
|
|
||||||
fids
|
|
||||||
} else {
|
|
||||||
ctx.index.fields_ids_map(ctx.txn)?.ids().collect()
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut candidates_per_attribute = Vec::with_capacity(searchable_fields_ids.len());
|
let mut candidates_per_attribute = Vec::with_capacity(searchable_fields_ids.len());
|
||||||
// then check that there exists at least one attribute that has all of the terms
|
// then check that there exists at least one attribute that has all of the terms
|
||||||
|
@ -258,7 +258,7 @@ pub(crate) mod tests {
|
|||||||
fn matching_words() {
|
fn matching_words() {
|
||||||
let temp_index = temp_index_with_documents();
|
let temp_index = temp_index_with_documents();
|
||||||
let rtxn = temp_index.read_txn().unwrap();
|
let rtxn = temp_index.read_txn().unwrap();
|
||||||
let mut ctx = SearchContext::new(&temp_index, &rtxn);
|
let mut ctx = SearchContext::new(&temp_index, &rtxn).unwrap();
|
||||||
let mut builder = TokenizerBuilder::default();
|
let mut builder = TokenizerBuilder::default();
|
||||||
let tokenizer = builder.build();
|
let tokenizer = builder.build();
|
||||||
let tokens = tokenizer.tokenize("split this world");
|
let tokens = tokenizer.tokenize("split this world");
|
||||||
|
@ -506,7 +506,7 @@ mod tests {
|
|||||||
|
|
||||||
impl<'a> MatcherBuilder<'a> {
|
impl<'a> MatcherBuilder<'a> {
|
||||||
fn new_test(rtxn: &'a heed::RoTxn, index: &'a TempIndex, query: &str) -> Self {
|
fn new_test(rtxn: &'a heed::RoTxn, index: &'a TempIndex, query: &str) -> Self {
|
||||||
let mut ctx = SearchContext::new(index, rtxn);
|
let mut ctx = SearchContext::new(index, rtxn).unwrap();
|
||||||
let universe = filtered_universe(&ctx, &None).unwrap();
|
let universe = filtered_universe(&ctx, &None).unwrap();
|
||||||
let crate::search::PartialSearchResult { located_query_terms, .. } = execute_search(
|
let crate::search::PartialSearchResult { located_query_terms, .. } = execute_search(
|
||||||
&mut ctx,
|
&mut ctx,
|
||||||
|
@ -49,13 +49,12 @@ pub use self::geo_sort::Strategy as GeoSortStrategy;
|
|||||||
use self::graph_based_ranking_rule::Words;
|
use self::graph_based_ranking_rule::Words;
|
||||||
use self::interner::Interned;
|
use self::interner::Interned;
|
||||||
use self::vector_sort::VectorSort;
|
use self::vector_sort::VectorSort;
|
||||||
use crate::error::FieldIdMapMissingEntry;
|
|
||||||
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
||||||
use crate::search::new::distinct::apply_distinct_rule;
|
use crate::search::new::distinct::apply_distinct_rule;
|
||||||
use crate::vector::Embedder;
|
use crate::vector::Embedder;
|
||||||
use crate::{
|
use crate::{
|
||||||
AscDesc, DocumentId, FieldId, Filter, Index, Member, Result, TermsMatchingStrategy, TimeBudget,
|
AscDesc, DocumentId, FieldId, Filter, Index, Member, Result, TermsMatchingStrategy, TimeBudget,
|
||||||
UserError,
|
UserError, Weight,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// A structure used throughout the execution of a search query.
|
/// A structure used throughout the execution of a search query.
|
||||||
@ -71,8 +70,21 @@ pub struct SearchContext<'ctx> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<'ctx> SearchContext<'ctx> {
|
impl<'ctx> SearchContext<'ctx> {
|
||||||
pub fn new(index: &'ctx Index, txn: &'ctx RoTxn<'ctx>) -> Self {
|
pub fn new(index: &'ctx Index, txn: &'ctx RoTxn<'ctx>) -> Result<Self> {
|
||||||
Self {
|
let searchable_fids = index.searchable_fields_and_weights(txn)?;
|
||||||
|
let exact_attributes_ids = index.exact_attributes_ids(txn)?;
|
||||||
|
|
||||||
|
let mut exact = Vec::new();
|
||||||
|
let mut tolerant = Vec::new();
|
||||||
|
for (_name, fid, weight) in searchable_fids {
|
||||||
|
if exact_attributes_ids.contains(&fid) {
|
||||||
|
exact.push((fid, weight));
|
||||||
|
} else {
|
||||||
|
tolerant.push((fid, weight));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
index,
|
index,
|
||||||
txn,
|
txn,
|
||||||
db_cache: <_>::default(),
|
db_cache: <_>::default(),
|
||||||
@ -81,42 +93,39 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
term_interner: <_>::default(),
|
term_interner: <_>::default(),
|
||||||
phrase_docids: <_>::default(),
|
phrase_docids: <_>::default(),
|
||||||
restricted_fids: None,
|
restricted_fids: None,
|
||||||
}
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn searchable_attributes(&mut self, searchable_attributes: &'ctx [String]) -> Result<()> {
|
pub fn attributes_to_search_on(
|
||||||
let fids_map = self.index.fields_ids_map(self.txn)?;
|
&mut self,
|
||||||
let searchable_names = self.index.searchable_fields(self.txn)?;
|
attributes_to_search_on: &'ctx [String],
|
||||||
|
) -> Result<()> {
|
||||||
|
let user_defined_searchable = self.index.user_defined_searchable_fields(self.txn)?;
|
||||||
|
let searchable_fields_weights = self.index.searchable_fields_and_weights(self.txn)?;
|
||||||
let exact_attributes_ids = self.index.exact_attributes_ids(self.txn)?;
|
let exact_attributes_ids = self.index.exact_attributes_ids(self.txn)?;
|
||||||
|
|
||||||
|
let mut wildcard = false;
|
||||||
|
|
||||||
let mut restricted_fids = RestrictedFids::default();
|
let mut restricted_fids = RestrictedFids::default();
|
||||||
let mut contains_wildcard = false;
|
for field_name in attributes_to_search_on {
|
||||||
for field_name in searchable_attributes {
|
|
||||||
if field_name == "*" {
|
if field_name == "*" {
|
||||||
contains_wildcard = true;
|
wildcard = true;
|
||||||
|
// we cannot early exit as we want to returns error in case of unknown fields
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
let searchable_contains_name =
|
let searchable_weight =
|
||||||
searchable_names.as_ref().map(|sn| sn.iter().any(|name| name == field_name));
|
searchable_fields_weights.iter().find(|(name, _, _)| name == field_name);
|
||||||
let fid = match (fids_map.id(field_name), searchable_contains_name) {
|
let (fid, weight) = match searchable_weight {
|
||||||
// The Field id exist and the field is searchable
|
// The Field id exist and the field is searchable
|
||||||
(Some(fid), Some(true)) | (Some(fid), None) => fid,
|
Some((_name, fid, weight)) => (*fid, *weight),
|
||||||
// The field is searchable but the Field id doesn't exist => Internal Error
|
// The field is not searchable but the user didn't define any searchable attributes
|
||||||
(None, Some(true)) => {
|
None if user_defined_searchable.is_none() => continue,
|
||||||
return Err(FieldIdMapMissingEntry::FieldName {
|
|
||||||
field_name: field_name.to_string(),
|
|
||||||
process: "search",
|
|
||||||
}
|
|
||||||
.into())
|
|
||||||
}
|
|
||||||
// The field is not searchable, but the searchableAttributes are set to * => ignore field
|
|
||||||
(None, None) => continue,
|
|
||||||
// The field is not searchable => User error
|
// The field is not searchable => User error
|
||||||
(_fid, Some(false)) => {
|
None => {
|
||||||
let (valid_fields, hidden_fields) = match searchable_names {
|
let (valid_fields, hidden_fields) = self.index.remove_hidden_fields(
|
||||||
Some(sn) => self.index.remove_hidden_fields(self.txn, sn)?,
|
self.txn,
|
||||||
None => self.index.remove_hidden_fields(self.txn, fids_map.names())?,
|
searchable_fields_weights.iter().map(|(name, _, _)| name),
|
||||||
};
|
)?;
|
||||||
|
|
||||||
let field = field_name.to_string();
|
let field = field_name.to_string();
|
||||||
return Err(UserError::InvalidSearchableAttribute {
|
return Err(UserError::InvalidSearchableAttribute {
|
||||||
@ -129,13 +138,17 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
if exact_attributes_ids.contains(&fid) {
|
if exact_attributes_ids.contains(&fid) {
|
||||||
restricted_fids.exact.push(fid);
|
restricted_fids.exact.push((fid, weight));
|
||||||
} else {
|
} else {
|
||||||
restricted_fids.tolerant.push(fid);
|
restricted_fids.tolerant.push((fid, weight));
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
self.restricted_fids = (!contains_wildcard).then_some(restricted_fids);
|
if wildcard {
|
||||||
|
self.restricted_fids = None;
|
||||||
|
} else {
|
||||||
|
self.restricted_fids = Some(restricted_fids);
|
||||||
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@ -158,13 +171,13 @@ impl Word {
|
|||||||
|
|
||||||
#[derive(Debug, Clone, Default)]
|
#[derive(Debug, Clone, Default)]
|
||||||
pub struct RestrictedFids {
|
pub struct RestrictedFids {
|
||||||
pub tolerant: Vec<FieldId>,
|
pub tolerant: Vec<(FieldId, Weight)>,
|
||||||
pub exact: Vec<FieldId>,
|
pub exact: Vec<(FieldId, Weight)>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RestrictedFids {
|
impl RestrictedFids {
|
||||||
pub fn contains(&self, fid: &FieldId) -> bool {
|
pub fn contains(&self, fid: &FieldId) -> bool {
|
||||||
self.tolerant.contains(fid) || self.exact.contains(fid)
|
self.tolerant.iter().any(|(id, _)| id == fid) || self.exact.iter().any(|(id, _)| id == fid)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -366,7 +366,7 @@ mod tests {
|
|||||||
let tokens = tokenizer.tokenize(".");
|
let tokens = tokenizer.tokenize(".");
|
||||||
let index = temp_index_with_documents();
|
let index = temp_index_with_documents();
|
||||||
let rtxn = index.read_txn()?;
|
let rtxn = index.read_txn()?;
|
||||||
let mut ctx = SearchContext::new(&index, &rtxn);
|
let mut ctx = SearchContext::new(&index, &rtxn)?;
|
||||||
// panics with `attempt to add with overflow` before <https://github.com/meilisearch/meilisearch/issues/3785>
|
// panics with `attempt to add with overflow` before <https://github.com/meilisearch/meilisearch/issues/3785>
|
||||||
let ExtractedTokens { query_terms, .. } =
|
let ExtractedTokens { query_terms, .. } =
|
||||||
located_query_terms_from_tokens(&mut ctx, tokens, None)?;
|
located_query_terms_from_tokens(&mut ctx, tokens, None)?;
|
||||||
|
@ -7,12 +7,12 @@ use crate::search::new::interner::{DedupInterner, Interned};
|
|||||||
use crate::search::new::query_term::LocatedQueryTermSubset;
|
use crate::search::new::query_term::LocatedQueryTermSubset;
|
||||||
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids_within_field_id;
|
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids_within_field_id;
|
||||||
use crate::search::new::SearchContext;
|
use crate::search::new::SearchContext;
|
||||||
use crate::Result;
|
use crate::{FieldId, InternalError, Result};
|
||||||
|
|
||||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||||
pub struct FidCondition {
|
pub struct FidCondition {
|
||||||
term: LocatedQueryTermSubset,
|
term: LocatedQueryTermSubset,
|
||||||
fid: u16,
|
fid: Option<FieldId>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub enum FidGraph {}
|
pub enum FidGraph {}
|
||||||
@ -26,13 +26,15 @@ impl RankingRuleGraphTrait for FidGraph {
|
|||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
) -> Result<ComputedCondition> {
|
) -> Result<ComputedCondition> {
|
||||||
let FidCondition { term, .. } = condition;
|
let FidCondition { term, .. } = condition;
|
||||||
// maybe compute_query_term_subset_docids_within_field_id should accept a universe as argument
|
|
||||||
let mut docids = compute_query_term_subset_docids_within_field_id(
|
let docids = if let Some(fid) = condition.fid {
|
||||||
ctx,
|
// maybe compute_query_term_subset_docids_within_field_id should accept a universe as argument
|
||||||
&term.term_subset,
|
let docids =
|
||||||
condition.fid,
|
compute_query_term_subset_docids_within_field_id(ctx, &term.term_subset, fid)?;
|
||||||
)?;
|
docids & universe
|
||||||
docids &= universe;
|
} else {
|
||||||
|
RoaringBitmap::new()
|
||||||
|
};
|
||||||
|
|
||||||
Ok(ComputedCondition {
|
Ok(ComputedCondition {
|
||||||
docids,
|
docids,
|
||||||
@ -68,34 +70,29 @@ impl RankingRuleGraphTrait for FidGraph {
|
|||||||
all_fields.extend(fields);
|
all_fields.extend(fields);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let weights_map = ctx.index.fieldids_weights_map(ctx.txn)?;
|
||||||
|
|
||||||
let mut edges = vec![];
|
let mut edges = vec![];
|
||||||
for fid in all_fields.iter().copied() {
|
for fid in all_fields.iter().copied() {
|
||||||
|
let weight = weights_map
|
||||||
|
.weight(fid)
|
||||||
|
.ok_or(InternalError::FieldidsWeightsMapMissingEntry { key: fid })?;
|
||||||
edges.push((
|
edges.push((
|
||||||
fid as u32 * term.term_ids.len() as u32,
|
weight as u32 * term.term_ids.len() as u32,
|
||||||
conditions_interner.insert(FidCondition { term: term.clone(), fid }),
|
conditions_interner.insert(FidCondition { term: term.clone(), fid: Some(fid) }),
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
// always lookup the max_fid if we don't already and add an artificial condition for max scoring
|
// always lookup the max_fid if we don't already and add an artificial condition for max scoring
|
||||||
let max_fid: Option<u16> = {
|
let max_weight: Option<u16> = weights_map.max_weight();
|
||||||
if let Some(max_fid) = ctx
|
|
||||||
.index
|
|
||||||
.searchable_fields_ids(ctx.txn)?
|
|
||||||
.map(|field_ids| field_ids.into_iter().max())
|
|
||||||
{
|
|
||||||
max_fid
|
|
||||||
} else {
|
|
||||||
ctx.index.fields_ids_map(ctx.txn)?.ids().max()
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
if let Some(max_fid) = max_fid {
|
if let Some(max_weight) = max_weight {
|
||||||
if !all_fields.contains(&max_fid) {
|
if !all_fields.contains(&max_weight) {
|
||||||
edges.push((
|
edges.push((
|
||||||
max_fid as u32 * term.term_ids.len() as u32, // TODO improve the fid score i.e. fid^10.
|
max_weight as u32 * term.term_ids.len() as u32, // TODO improve the fid score i.e. fid^10.
|
||||||
conditions_interner.insert(FidCondition {
|
conditions_interner.insert(FidCondition {
|
||||||
term: term.clone(), // TODO remove this ugly clone
|
term: term.clone(), // TODO remove this ugly clone
|
||||||
fid: max_fid,
|
fid: None,
|
||||||
}),
|
}),
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
use crate::index::tests::TempIndex;
|
use crate::index::tests::TempIndex;
|
||||||
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
|
use crate::{db_snap, Criterion, Search, SearchResult, TermsMatchingStrategy};
|
||||||
|
|
||||||
fn create_index() -> TempIndex {
|
fn create_index() -> TempIndex {
|
||||||
let index = TempIndex::new();
|
let index = TempIndex::new();
|
||||||
@ -131,6 +131,19 @@ fn test_attribute_fid_simple() {
|
|||||||
#[test]
|
#[test]
|
||||||
fn test_attribute_fid_ngrams() {
|
fn test_attribute_fid_ngrams() {
|
||||||
let index = create_index();
|
let index = create_index();
|
||||||
|
db_snap!(index, fields_ids_map, @r###"
|
||||||
|
0 id |
|
||||||
|
1 title |
|
||||||
|
2 description |
|
||||||
|
3 plot |
|
||||||
|
"###);
|
||||||
|
db_snap!(index, searchable_fields, @r###"["title", "description", "plot"]"###);
|
||||||
|
db_snap!(index, fieldids_weights_map, @r###"
|
||||||
|
fid weight
|
||||||
|
1 0 |
|
||||||
|
2 1 |
|
||||||
|
3 2 |
|
||||||
|
"###);
|
||||||
|
|
||||||
let txn = index.read_txn().unwrap();
|
let txn = index.read_txn().unwrap();
|
||||||
|
|
||||||
|
@ -0,0 +1,244 @@
|
|||||||
|
---
|
||||||
|
source: milli/src/search/new/tests/attribute_fid.rs
|
||||||
|
expression: "format!(\"{document_ids_scores:#?}\")"
|
||||||
|
---
|
||||||
|
[
|
||||||
|
(
|
||||||
|
2,
|
||||||
|
[
|
||||||
|
Fid(
|
||||||
|
Rank {
|
||||||
|
rank: 19,
|
||||||
|
max_rank: 19,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Position(
|
||||||
|
Rank {
|
||||||
|
rank: 91,
|
||||||
|
max_rank: 91,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
6,
|
||||||
|
[
|
||||||
|
Fid(
|
||||||
|
Rank {
|
||||||
|
rank: 15,
|
||||||
|
max_rank: 19,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Position(
|
||||||
|
Rank {
|
||||||
|
rank: 81,
|
||||||
|
max_rank: 91,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
5,
|
||||||
|
[
|
||||||
|
Fid(
|
||||||
|
Rank {
|
||||||
|
rank: 14,
|
||||||
|
max_rank: 19,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Position(
|
||||||
|
Rank {
|
||||||
|
rank: 79,
|
||||||
|
max_rank: 91,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
4,
|
||||||
|
[
|
||||||
|
Fid(
|
||||||
|
Rank {
|
||||||
|
rank: 13,
|
||||||
|
max_rank: 19,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Position(
|
||||||
|
Rank {
|
||||||
|
rank: 77,
|
||||||
|
max_rank: 91,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
3,
|
||||||
|
[
|
||||||
|
Fid(
|
||||||
|
Rank {
|
||||||
|
rank: 12,
|
||||||
|
max_rank: 19,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Position(
|
||||||
|
Rank {
|
||||||
|
rank: 83,
|
||||||
|
max_rank: 91,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
9,
|
||||||
|
[
|
||||||
|
Fid(
|
||||||
|
Rank {
|
||||||
|
rank: 11,
|
||||||
|
max_rank: 19,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Position(
|
||||||
|
Rank {
|
||||||
|
rank: 75,
|
||||||
|
max_rank: 91,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
8,
|
||||||
|
[
|
||||||
|
Fid(
|
||||||
|
Rank {
|
||||||
|
rank: 10,
|
||||||
|
max_rank: 19,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Position(
|
||||||
|
Rank {
|
||||||
|
rank: 79,
|
||||||
|
max_rank: 91,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
7,
|
||||||
|
[
|
||||||
|
Fid(
|
||||||
|
Rank {
|
||||||
|
rank: 10,
|
||||||
|
max_rank: 19,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Position(
|
||||||
|
Rank {
|
||||||
|
rank: 73,
|
||||||
|
max_rank: 91,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
11,
|
||||||
|
[
|
||||||
|
Fid(
|
||||||
|
Rank {
|
||||||
|
rank: 7,
|
||||||
|
max_rank: 19,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Position(
|
||||||
|
Rank {
|
||||||
|
rank: 77,
|
||||||
|
max_rank: 91,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
10,
|
||||||
|
[
|
||||||
|
Fid(
|
||||||
|
Rank {
|
||||||
|
rank: 6,
|
||||||
|
max_rank: 19,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Position(
|
||||||
|
Rank {
|
||||||
|
rank: 81,
|
||||||
|
max_rank: 91,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
13,
|
||||||
|
[
|
||||||
|
Fid(
|
||||||
|
Rank {
|
||||||
|
rank: 6,
|
||||||
|
max_rank: 19,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Position(
|
||||||
|
Rank {
|
||||||
|
rank: 81,
|
||||||
|
max_rank: 91,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
12,
|
||||||
|
[
|
||||||
|
Fid(
|
||||||
|
Rank {
|
||||||
|
rank: 6,
|
||||||
|
max_rank: 19,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Position(
|
||||||
|
Rank {
|
||||||
|
rank: 78,
|
||||||
|
max_rank: 91,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
14,
|
||||||
|
[
|
||||||
|
Fid(
|
||||||
|
Rank {
|
||||||
|
rank: 5,
|
||||||
|
max_rank: 19,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Position(
|
||||||
|
Rank {
|
||||||
|
rank: 75,
|
||||||
|
max_rank: 91,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
0,
|
||||||
|
[
|
||||||
|
Fid(
|
||||||
|
Rank {
|
||||||
|
rank: 1,
|
||||||
|
max_rank: 19,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Position(
|
||||||
|
Rank {
|
||||||
|
rank: 91,
|
||||||
|
max_rank: 91,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
]
|
@ -308,6 +308,25 @@ pub fn snap_fields_ids_map(index: &Index) -> String {
|
|||||||
}
|
}
|
||||||
snap
|
snap
|
||||||
}
|
}
|
||||||
|
pub fn snap_fieldids_weights_map(index: &Index) -> String {
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let weights_map = index.fieldids_weights_map(&rtxn).unwrap();
|
||||||
|
|
||||||
|
let mut snap = String::new();
|
||||||
|
writeln!(&mut snap, "fid weight").unwrap();
|
||||||
|
let mut field_ids: Vec<_> = weights_map.ids().collect();
|
||||||
|
field_ids.sort();
|
||||||
|
for field_id in field_ids {
|
||||||
|
let weight = weights_map.weight(field_id).unwrap();
|
||||||
|
writeln!(&mut snap, "{field_id:<3} {weight:<3} |").unwrap();
|
||||||
|
}
|
||||||
|
snap
|
||||||
|
}
|
||||||
|
pub fn snap_searchable_fields(index: &Index) -> String {
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let searchable_fields = index.searchable_fields(&rtxn).unwrap();
|
||||||
|
format!("{searchable_fields:?}")
|
||||||
|
}
|
||||||
pub fn snap_geo_faceted_documents_ids(index: &Index) -> String {
|
pub fn snap_geo_faceted_documents_ids(index: &Index) -> String {
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
let geo_faceted_documents_ids = index.geo_faceted_documents_ids(&rtxn).unwrap();
|
let geo_faceted_documents_ids = index.geo_faceted_documents_ids(&rtxn).unwrap();
|
||||||
@ -469,6 +488,12 @@ macro_rules! full_snap_of_db {
|
|||||||
($index:ident, fields_ids_map) => {{
|
($index:ident, fields_ids_map) => {{
|
||||||
$crate::snapshot_tests::snap_fields_ids_map(&$index)
|
$crate::snapshot_tests::snap_fields_ids_map(&$index)
|
||||||
}};
|
}};
|
||||||
|
($index:ident, fieldids_weights_map) => {{
|
||||||
|
$crate::snapshot_tests::snap_fieldids_weights_map(&$index)
|
||||||
|
}};
|
||||||
|
($index:ident, searchable_fields) => {{
|
||||||
|
$crate::snapshot_tests::snap_searchable_fields(&$index)
|
||||||
|
}};
|
||||||
($index:ident, geo_faceted_documents_ids) => {{
|
($index:ident, geo_faceted_documents_ids) => {{
|
||||||
$crate::snapshot_tests::snap_geo_faceted_documents_ids(&$index)
|
$crate::snapshot_tests::snap_geo_faceted_documents_ids(&$index)
|
||||||
}};
|
}};
|
||||||
|
@ -186,7 +186,7 @@ fn searchable_fields_changed(
|
|||||||
) -> bool {
|
) -> bool {
|
||||||
let searchable_fields = &settings_diff.new.searchable_fields_ids;
|
let searchable_fields = &settings_diff.new.searchable_fields_ids;
|
||||||
for (field_id, field_bytes) in obkv.iter() {
|
for (field_id, field_bytes) in obkv.iter() {
|
||||||
if searchable_fields.as_ref().map_or(true, |sf| sf.contains(&field_id)) {
|
if searchable_fields.contains(&field_id) {
|
||||||
let del_add = KvReaderDelAdd::new(field_bytes);
|
let del_add = KvReaderDelAdd::new(field_bytes);
|
||||||
match (del_add.get(DelAdd::Deletion), del_add.get(DelAdd::Addition)) {
|
match (del_add.get(DelAdd::Deletion), del_add.get(DelAdd::Addition)) {
|
||||||
// if both fields are None, check the next field.
|
// if both fields are None, check the next field.
|
||||||
@ -298,7 +298,7 @@ fn lang_safe_tokens_from_document<'a>(
|
|||||||
/// Extract words mapped with their positions of a document.
|
/// Extract words mapped with their positions of a document.
|
||||||
fn tokens_from_document<'a>(
|
fn tokens_from_document<'a>(
|
||||||
obkv: &KvReader<FieldId>,
|
obkv: &KvReader<FieldId>,
|
||||||
searchable_fields: &Option<Vec<FieldId>>,
|
searchable_fields: &[FieldId],
|
||||||
tokenizer: &Tokenizer,
|
tokenizer: &Tokenizer,
|
||||||
max_positions_per_attributes: u32,
|
max_positions_per_attributes: u32,
|
||||||
del_add: DelAdd,
|
del_add: DelAdd,
|
||||||
@ -309,7 +309,7 @@ fn tokens_from_document<'a>(
|
|||||||
let mut document_writer = KvWriterU16::new(&mut buffers.obkv_buffer);
|
let mut document_writer = KvWriterU16::new(&mut buffers.obkv_buffer);
|
||||||
for (field_id, field_bytes) in obkv.iter() {
|
for (field_id, field_bytes) in obkv.iter() {
|
||||||
// if field is searchable.
|
// if field is searchable.
|
||||||
if searchable_fields.as_ref().map_or(true, |sf| sf.contains(&field_id)) {
|
if searchable_fields.as_ref().contains(&field_id) {
|
||||||
// extract deletion or addition only.
|
// extract deletion or addition only.
|
||||||
if let Some(field_bytes) = KvReaderDelAdd::new(field_bytes).get(del_add) {
|
if let Some(field_bytes) = KvReaderDelAdd::new(field_bytes).get(del_add) {
|
||||||
// parse json.
|
// parse json.
|
||||||
|
@ -3260,6 +3260,7 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
#[cfg(feature = "all-tokenizations")]
|
||||||
fn stored_detected_script_and_language_should_not_return_deleted_documents() {
|
fn stored_detected_script_and_language_should_not_return_deleted_documents() {
|
||||||
use charabia::{Language, Script};
|
use charabia::{Language, Script};
|
||||||
let index = TempIndex::new();
|
let index = TempIndex::new();
|
||||||
|
@ -461,50 +461,39 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
Ok(true)
|
Ok(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Updates the index's searchable attributes. This causes the field map to be recomputed to
|
/// Updates the index's searchable attributes.
|
||||||
/// reflect the order of the searchable attributes.
|
|
||||||
fn update_searchable(&mut self) -> Result<bool> {
|
fn update_searchable(&mut self) -> Result<bool> {
|
||||||
match self.searchable_fields {
|
match self.searchable_fields {
|
||||||
Setting::Set(ref fields) => {
|
Setting::Set(ref fields) => {
|
||||||
// Check to see if the searchable fields changed before doing anything else
|
// Check to see if the searchable fields changed before doing anything else
|
||||||
let old_fields = self.index.searchable_fields(self.wtxn)?;
|
let old_fields = self.index.searchable_fields(self.wtxn)?;
|
||||||
let did_change = match old_fields {
|
let did_change = {
|
||||||
// If old_fields is Some, let's check to see if the fields actually changed
|
let new_fields = fields.iter().map(String::as_str).collect::<Vec<_>>();
|
||||||
Some(old_fields) => {
|
new_fields != old_fields
|
||||||
let new_fields = fields.iter().map(String::as_str).collect::<Vec<_>>();
|
|
||||||
new_fields != old_fields
|
|
||||||
}
|
|
||||||
// If old_fields is None, the fields have changed (because they are being set)
|
|
||||||
None => true,
|
|
||||||
};
|
};
|
||||||
if !did_change {
|
if !did_change {
|
||||||
return Ok(false);
|
return Ok(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
// every time the searchable attributes are updated, we need to update the
|
// Since we're updating the settings we can only add new fields at the end of the field id map
|
||||||
// ids for any settings that uses the facets. (distinct_fields, filterable_fields).
|
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||||
let old_fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
|
||||||
|
|
||||||
let mut new_fields_ids_map = FieldsIdsMap::new();
|
|
||||||
// fields are deduplicated, only the first occurrence is taken into account
|
// fields are deduplicated, only the first occurrence is taken into account
|
||||||
let names = fields.iter().unique().map(String::as_str).collect::<Vec<_>>();
|
let names = fields.iter().unique().map(String::as_str).collect::<Vec<_>>();
|
||||||
|
|
||||||
// Add all the searchable attributes to the field map, and then add the
|
// Add all the searchable attributes to the field map, and then add the
|
||||||
// remaining fields from the old field map to the new one
|
// remaining fields from the old field map to the new one
|
||||||
for name in names.iter() {
|
for name in names.iter() {
|
||||||
new_fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?;
|
// The fields ids map won't change the field id of already present elements thus only the
|
||||||
}
|
// new fields will be inserted.
|
||||||
|
fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?;
|
||||||
for (_, name) in old_fields_ids_map.iter() {
|
|
||||||
new_fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
self.index.put_all_searchable_fields_from_fields_ids_map(
|
self.index.put_all_searchable_fields_from_fields_ids_map(
|
||||||
self.wtxn,
|
self.wtxn,
|
||||||
&names,
|
&names,
|
||||||
&new_fields_ids_map,
|
&fields_ids_map,
|
||||||
)?;
|
)?;
|
||||||
self.index.put_fields_ids_map(self.wtxn, &new_fields_ids_map)?;
|
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
|
||||||
Ok(true)
|
Ok(true)
|
||||||
}
|
}
|
||||||
Setting::Reset => Ok(self.index.delete_all_searchable_fields(self.wtxn)?),
|
Setting::Reset => Ok(self.index.delete_all_searchable_fields(self.wtxn)?),
|
||||||
@ -1172,7 +1161,7 @@ pub(crate) struct InnerIndexSettings {
|
|||||||
pub user_defined_faceted_fields: HashSet<String>,
|
pub user_defined_faceted_fields: HashSet<String>,
|
||||||
pub user_defined_searchable_fields: Option<Vec<String>>,
|
pub user_defined_searchable_fields: Option<Vec<String>>,
|
||||||
pub faceted_fields_ids: HashSet<FieldId>,
|
pub faceted_fields_ids: HashSet<FieldId>,
|
||||||
pub searchable_fields_ids: Option<Vec<FieldId>>,
|
pub searchable_fields_ids: Vec<FieldId>,
|
||||||
pub exact_attributes: HashSet<FieldId>,
|
pub exact_attributes: HashSet<FieldId>,
|
||||||
pub proximity_precision: ProximityPrecision,
|
pub proximity_precision: ProximityPrecision,
|
||||||
pub embedding_configs: EmbeddingConfigs,
|
pub embedding_configs: EmbeddingConfigs,
|
||||||
@ -1233,18 +1222,21 @@ impl InnerIndexSettings {
|
|||||||
|
|
||||||
// find and insert the new field ids
|
// find and insert the new field ids
|
||||||
pub fn recompute_searchables(&mut self, wtxn: &mut heed::RwTxn, index: &Index) -> Result<()> {
|
pub fn recompute_searchables(&mut self, wtxn: &mut heed::RwTxn, index: &Index) -> Result<()> {
|
||||||
|
let searchable_fields = self
|
||||||
|
.user_defined_searchable_fields
|
||||||
|
.as_ref()
|
||||||
|
.map(|searchable| searchable.iter().map(|s| s.as_str()).collect::<Vec<_>>());
|
||||||
|
|
||||||
// in case new fields were introduced we're going to recreate the searchable fields.
|
// in case new fields were introduced we're going to recreate the searchable fields.
|
||||||
if let Some(searchable_fields) = self.user_defined_searchable_fields.as_ref() {
|
if let Some(searchable_fields) = searchable_fields {
|
||||||
let searchable_fields =
|
|
||||||
searchable_fields.iter().map(String::as_ref).collect::<Vec<_>>();
|
|
||||||
index.put_all_searchable_fields_from_fields_ids_map(
|
index.put_all_searchable_fields_from_fields_ids_map(
|
||||||
wtxn,
|
wtxn,
|
||||||
&searchable_fields,
|
&searchable_fields,
|
||||||
&self.fields_ids_map,
|
&self.fields_ids_map,
|
||||||
)?;
|
)?;
|
||||||
let searchable_fields_ids = index.searchable_fields_ids(wtxn)?;
|
|
||||||
self.searchable_fields_ids = searchable_fields_ids;
|
|
||||||
}
|
}
|
||||||
|
let searchable_fields_ids = index.searchable_fields_ids(wtxn)?;
|
||||||
|
self.searchable_fields_ids = searchable_fields_ids;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@ -1517,12 +1509,13 @@ mod tests {
|
|||||||
use big_s::S;
|
use big_s::S;
|
||||||
use heed::types::Bytes;
|
use heed::types::Bytes;
|
||||||
use maplit::{btreemap, btreeset, hashset};
|
use maplit::{btreemap, btreeset, hashset};
|
||||||
|
use meili_snap::snapshot;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::error::Error;
|
use crate::error::Error;
|
||||||
use crate::index::tests::TempIndex;
|
use crate::index::tests::TempIndex;
|
||||||
use crate::update::ClearDocuments;
|
use crate::update::ClearDocuments;
|
||||||
use crate::{Criterion, Filter, SearchResult};
|
use crate::{db_snap, Criterion, Filter, SearchResult};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn set_and_reset_searchable_fields() {
|
fn set_and_reset_searchable_fields() {
|
||||||
@ -1551,6 +1544,17 @@ mod tests {
|
|||||||
|
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
|
db_snap!(index, fields_ids_map, @r###"
|
||||||
|
0 id |
|
||||||
|
1 name |
|
||||||
|
2 age |
|
||||||
|
"###);
|
||||||
|
db_snap!(index, searchable_fields, @r###"["name"]"###);
|
||||||
|
db_snap!(index, fieldids_weights_map, @r###"
|
||||||
|
fid weight
|
||||||
|
1 0 |
|
||||||
|
"###);
|
||||||
|
|
||||||
// Check that the searchable field is correctly set to "name" only.
|
// Check that the searchable field is correctly set to "name" only.
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
// When we search for something that is not in
|
// When we search for something that is not in
|
||||||
@ -1562,8 +1566,9 @@ mod tests {
|
|||||||
// we must find the appropriate document.
|
// we must find the appropriate document.
|
||||||
let result = index.search(&rtxn).query(r#""kevin""#).execute().unwrap();
|
let result = index.search(&rtxn).query(r#""kevin""#).execute().unwrap();
|
||||||
let documents = index.documents(&rtxn, result.documents_ids).unwrap();
|
let documents = index.documents(&rtxn, result.documents_ids).unwrap();
|
||||||
|
let fid_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
assert_eq!(documents.len(), 1);
|
assert_eq!(documents.len(), 1);
|
||||||
assert_eq!(documents[0].1.get(0), Some(&br#""kevin""#[..]));
|
assert_eq!(documents[0].1.get(fid_map.id("name").unwrap()), Some(&br#""kevin""#[..]));
|
||||||
drop(rtxn);
|
drop(rtxn);
|
||||||
|
|
||||||
// We change the searchable fields to be the "name" field only.
|
// We change the searchable fields to be the "name" field only.
|
||||||
@ -1573,14 +1578,31 @@ mod tests {
|
|||||||
})
|
})
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
db_snap!(index, fields_ids_map, @r###"
|
||||||
|
0 id |
|
||||||
|
1 name |
|
||||||
|
2 age |
|
||||||
|
"###);
|
||||||
|
db_snap!(index, searchable_fields, @r###"["id", "name", "age"]"###);
|
||||||
|
db_snap!(index, fieldids_weights_map, @r###"
|
||||||
|
fid weight
|
||||||
|
0 0 |
|
||||||
|
1 0 |
|
||||||
|
2 0 |
|
||||||
|
"###);
|
||||||
|
|
||||||
// Check that the searchable field have been reset and documents are found now.
|
// Check that the searchable field have been reset and documents are found now.
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let fid_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
|
let user_defined_searchable_fields = index.user_defined_searchable_fields(&rtxn).unwrap();
|
||||||
|
snapshot!(format!("{user_defined_searchable_fields:?}"), @"None");
|
||||||
|
// the searchable fields should contain all the fields
|
||||||
let searchable_fields = index.searchable_fields(&rtxn).unwrap();
|
let searchable_fields = index.searchable_fields(&rtxn).unwrap();
|
||||||
assert_eq!(searchable_fields, None);
|
snapshot!(format!("{searchable_fields:?}"), @r###"["id", "name", "age"]"###);
|
||||||
let result = index.search(&rtxn).query("23").execute().unwrap();
|
let result = index.search(&rtxn).query("23").execute().unwrap();
|
||||||
assert_eq!(result.documents_ids.len(), 1);
|
assert_eq!(result.documents_ids.len(), 1);
|
||||||
let documents = index.documents(&rtxn, result.documents_ids).unwrap();
|
let documents = index.documents(&rtxn, result.documents_ids).unwrap();
|
||||||
assert_eq!(documents[0].1.get(0), Some(&br#""kevin""#[..]));
|
assert_eq!(documents[0].1.get(fid_map.id("name").unwrap()), Some(&br#""kevin""#[..]));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user