mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-29 16:24:26 +01:00
Merge #3834
3834: Define searchable fields at runtime r=Kerollmops a=ManyTheFish ## Summary This feature allows the end-user to search in one or multiple attributes using the search parameter `attributesToSearchOn`: ```json { "q": "Captain Marvel", "attributesToSearchOn": ["title"] } ``` This feature act like a filter, forcing Meilisearch to only return the documents containing the requested words in the attributes-to-search-on. Note that, with the matching strategy `last`, Meilisearch will only ensure that the first word is in the attributes-to-search-on, but, the retrieved documents will be ordered taking into account the word contained in the attributes-to-search-on. ## Trying the prototype A dedicated docker image has been released for this feature: #### last prototype version: ```bash docker pull getmeili/meilisearch:prototype-define-searchable-fields-at-search-time-1 ``` #### others prototype versions: ```bash docker pull getmeili/meilisearch:prototype-define-searchable-fields-at-search-time-0 ``` ## Technical Detail The attributes-to-search-on list is given to the search context, then, the search context uses the `fid_word_docids`database using only the allowed field ids instead of the global `word_docids` database. This is the same for the prefix databases. The database cache is updated with the merged values, meaning that the union of the field-id-database values is only made if the requested key is missing from the cache. ### Relevancy limits Almost all ranking rules behave as expected when ordering the documents. Only `proximity` could miss-order documents if all the searched words are in the restricted attribute but a better proximity is found in an ignored attribute in a document that should be ranked lower. I put below a failing test showing it: ```rust #[actix_rt::test] async fn proximity_ranking_rule_order() { let server = Server::new().await; let index = index_with_documents( &server, &json!([ { "title": "Captain super mega cool. A Marvel story", // Perfect distance between words in an ignored attribute "desc": "Captain Marvel", "id": "1", }, { "title": "Captain America from Marvel", "desc": "a Shazam ersatz", "id": "2", }]), ) .await; // Document 2 should appear before document 1. index .search(json!({"q": "Captain Marvel", "attributesToSearchOn": ["title"], "attributesToRetrieve": ["id"]}), |response, code| { assert_eq!(code, 200, "{}", response); assert_eq!( response["hits"], json!([ {"id": "2"}, {"id": "1"}, ]) ); }) .await; } ``` Fixing this would force us to create a `fid_word_pair_proximity_docids` and a `fid_word_prefix_pair_proximity_docids` databases which may multiply the keys of `word_pair_proximity_docids` and `word_prefix_pair_proximity_docids` by the number of attributes in the searchable_attributes list. If we think we should fix this test, I'll suggest doing it in another PR. ## Related Fixes #3772 Co-authored-by: Tamo <tamo@meilisearch.com> Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
commit
d4f10800f2
@ -226,6 +226,7 @@ InvalidIndexLimit , InvalidRequest , BAD_REQUEST ;
|
|||||||
InvalidIndexOffset , InvalidRequest , BAD_REQUEST ;
|
InvalidIndexOffset , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ;
|
InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidIndexUid , InvalidRequest , BAD_REQUEST ;
|
InvalidIndexUid , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidAttributesToSearchOn , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchAttributesToHighlight , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchAttributesToHighlight , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchAttributesToRetrieve , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchAttributesToRetrieve , InvalidRequest , BAD_REQUEST ;
|
||||||
@ -336,6 +337,9 @@ impl ErrorCode for milli::Error {
|
|||||||
UserError::SortRankingRuleMissing => Code::InvalidSearchSort,
|
UserError::SortRankingRuleMissing => Code::InvalidSearchSort,
|
||||||
UserError::InvalidFacetsDistribution { .. } => Code::InvalidSearchFacets,
|
UserError::InvalidFacetsDistribution { .. } => Code::InvalidSearchFacets,
|
||||||
UserError::InvalidSortableAttribute { .. } => Code::InvalidSearchSort,
|
UserError::InvalidSortableAttribute { .. } => Code::InvalidSearchSort,
|
||||||
|
UserError::InvalidSearchableAttribute { .. } => {
|
||||||
|
Code::InvalidAttributesToSearchOn
|
||||||
|
}
|
||||||
UserError::CriterionError(_) => Code::InvalidSettingsRankingRules,
|
UserError::CriterionError(_) => Code::InvalidSettingsRankingRules,
|
||||||
UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField,
|
UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField,
|
||||||
UserError::InvalidVectorDimensions { .. } => Code::InvalidVectorDimensions,
|
UserError::InvalidVectorDimensions { .. } => Code::InvalidVectorDimensions,
|
||||||
|
@ -72,6 +72,8 @@ pub struct SearchQueryGet {
|
|||||||
crop_marker: String,
|
crop_marker: String,
|
||||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchMatchingStrategy>)]
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchMatchingStrategy>)]
|
||||||
matching_strategy: MatchingStrategy,
|
matching_strategy: MatchingStrategy,
|
||||||
|
#[deserr(default, error = DeserrQueryParamError<InvalidAttributesToSearchOn>)]
|
||||||
|
pub attributes_to_search_on: Option<CS<String>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<SearchQueryGet> for SearchQuery {
|
impl From<SearchQueryGet> for SearchQuery {
|
||||||
@ -105,6 +107,7 @@ impl From<SearchQueryGet> for SearchQuery {
|
|||||||
highlight_post_tag: other.highlight_post_tag,
|
highlight_post_tag: other.highlight_post_tag,
|
||||||
crop_marker: other.crop_marker,
|
crop_marker: other.crop_marker,
|
||||||
matching_strategy: other.matching_strategy,
|
matching_strategy: other.matching_strategy,
|
||||||
|
attributes_to_search_on: other.attributes_to_search_on.map(|o| o.into_iter().collect()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -79,6 +79,8 @@ pub struct SearchQuery {
|
|||||||
pub crop_marker: String,
|
pub crop_marker: String,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSearchMatchingStrategy>, default)]
|
#[deserr(default, error = DeserrJsonError<InvalidSearchMatchingStrategy>, default)]
|
||||||
pub matching_strategy: MatchingStrategy,
|
pub matching_strategy: MatchingStrategy,
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidAttributesToSearchOn>, default)]
|
||||||
|
pub attributes_to_search_on: Option<Vec<String>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SearchQuery {
|
impl SearchQuery {
|
||||||
@ -136,6 +138,8 @@ pub struct SearchQueryWithIndex {
|
|||||||
pub crop_marker: String,
|
pub crop_marker: String,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSearchMatchingStrategy>, default)]
|
#[deserr(default, error = DeserrJsonError<InvalidSearchMatchingStrategy>, default)]
|
||||||
pub matching_strategy: MatchingStrategy,
|
pub matching_strategy: MatchingStrategy,
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidAttributesToSearchOn>, default)]
|
||||||
|
pub attributes_to_search_on: Option<Vec<String>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SearchQueryWithIndex {
|
impl SearchQueryWithIndex {
|
||||||
@ -162,6 +166,7 @@ impl SearchQueryWithIndex {
|
|||||||
highlight_post_tag,
|
highlight_post_tag,
|
||||||
crop_marker,
|
crop_marker,
|
||||||
matching_strategy,
|
matching_strategy,
|
||||||
|
attributes_to_search_on,
|
||||||
} = self;
|
} = self;
|
||||||
(
|
(
|
||||||
index_uid,
|
index_uid,
|
||||||
@ -186,6 +191,7 @@ impl SearchQueryWithIndex {
|
|||||||
highlight_post_tag,
|
highlight_post_tag,
|
||||||
crop_marker,
|
crop_marker,
|
||||||
matching_strategy,
|
matching_strategy,
|
||||||
|
attributes_to_search_on,
|
||||||
// do not use ..Default::default() here,
|
// do not use ..Default::default() here,
|
||||||
// rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
|
// rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
|
||||||
},
|
},
|
||||||
@ -314,6 +320,10 @@ pub fn perform_search(
|
|||||||
search.query(query);
|
search.query(query);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if let Some(ref searchable) = query.attributes_to_search_on {
|
||||||
|
search.searchable_attributes(searchable);
|
||||||
|
}
|
||||||
|
|
||||||
let is_finite_pagination = query.is_finite_pagination();
|
let is_finite_pagination = query.is_finite_pagination();
|
||||||
search.terms_matching_strategy(query.matching_strategy.into());
|
search.terms_matching_strategy(query.matching_strategy.into());
|
||||||
|
|
||||||
|
@ -346,17 +346,24 @@ impl Index<'_> {
|
|||||||
query: Value,
|
query: Value,
|
||||||
test: impl Fn(Value, StatusCode) + UnwindSafe + Clone,
|
test: impl Fn(Value, StatusCode) + UnwindSafe + Clone,
|
||||||
) {
|
) {
|
||||||
let (response, code) = self.search_post(query.clone()).await;
|
let post = self.search_post(query.clone()).await;
|
||||||
let t = test.clone();
|
|
||||||
if let Err(e) = catch_unwind(move || t(response, code)) {
|
|
||||||
eprintln!("Error with post search");
|
|
||||||
resume_unwind(e);
|
|
||||||
}
|
|
||||||
let query = yaup::to_string(&query).unwrap();
|
let query = yaup::to_string(&query).unwrap();
|
||||||
let (response, code) = self.search_get(&query).await;
|
let get = self.search_get(&query).await;
|
||||||
if let Err(e) = catch_unwind(move || test(response, code)) {
|
|
||||||
eprintln!("Error with get search");
|
insta::allow_duplicates! {
|
||||||
resume_unwind(e);
|
let (response, code) = post;
|
||||||
|
let t = test.clone();
|
||||||
|
if let Err(e) = catch_unwind(move || t(response, code)) {
|
||||||
|
eprintln!("Error with post search");
|
||||||
|
resume_unwind(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
let (response, code) = get;
|
||||||
|
if let Err(e) = catch_unwind(move || test(response, code)) {
|
||||||
|
eprintln!("Error with get search");
|
||||||
|
resume_unwind(e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -963,3 +963,29 @@ async fn sort_unset_ranking_rule() {
|
|||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn search_on_unknown_field() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("test");
|
||||||
|
let documents = DOCUMENTS.clone();
|
||||||
|
index.add_documents(documents, None).await;
|
||||||
|
index.wait_task(0).await;
|
||||||
|
|
||||||
|
index
|
||||||
|
.search(
|
||||||
|
json!({"q": "Captain Marvel", "attributesToSearchOn": ["unknown"]}),
|
||||||
|
|response, code| {
|
||||||
|
snapshot!(code, @"400 Bad Request");
|
||||||
|
snapshot!(json_string!(response), @r###"
|
||||||
|
{
|
||||||
|
"message": "Attribute `unknown` is not searchable. Available searchable attributes are: `id, title`.",
|
||||||
|
"code": "invalid_attributes_to_search_on",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_attributes_to_search_on"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
@ -5,6 +5,7 @@ mod errors;
|
|||||||
mod formatted;
|
mod formatted;
|
||||||
mod multi;
|
mod multi;
|
||||||
mod pagination;
|
mod pagination;
|
||||||
|
mod restrict_searchable;
|
||||||
|
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use serde_json::{json, Value};
|
use serde_json::{json, Value};
|
||||||
|
267
meilisearch/tests/search/restrict_searchable.rs
Normal file
267
meilisearch/tests/search/restrict_searchable.rs
Normal file
@ -0,0 +1,267 @@
|
|||||||
|
use meili_snap::{json_string, snapshot};
|
||||||
|
use once_cell::sync::Lazy;
|
||||||
|
use serde_json::{json, Value};
|
||||||
|
|
||||||
|
use crate::common::index::Index;
|
||||||
|
use crate::common::Server;
|
||||||
|
|
||||||
|
async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Index<'a> {
|
||||||
|
let index = server.index("test");
|
||||||
|
|
||||||
|
index.add_documents(documents.clone(), None).await;
|
||||||
|
index.wait_task(0).await;
|
||||||
|
index
|
||||||
|
}
|
||||||
|
|
||||||
|
static SIMPLE_SEARCH_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||||
|
json!([
|
||||||
|
{
|
||||||
|
"title": "Shazam!",
|
||||||
|
"desc": "a Captain Marvel ersatz",
|
||||||
|
"id": "1",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Captain Planet",
|
||||||
|
"desc": "He's not part of the Marvel Cinematic Universe",
|
||||||
|
"id": "2",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Captain Marvel",
|
||||||
|
"desc": "a Shazam ersatz",
|
||||||
|
"id": "3",
|
||||||
|
}])
|
||||||
|
});
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn simple_search_on_title() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
|
||||||
|
|
||||||
|
// simple search should return 2 documents (ids: 2 and 3).
|
||||||
|
index
|
||||||
|
.search(
|
||||||
|
json!({"q": "Captain Marvel", "attributesToSearchOn": ["title"]}),
|
||||||
|
|response, code| {
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(response["hits"].as_array().unwrap().len(), @"2");
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn simple_prefix_search_on_title() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
|
||||||
|
|
||||||
|
// simple search should return 2 documents (ids: 2 and 3).
|
||||||
|
index
|
||||||
|
.search(json!({"q": "Captain Mar", "attributesToSearchOn": ["title"]}), |response, code| {
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(response["hits"].as_array().unwrap().len(), @"2");
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn simple_search_on_title_matching_strategy_all() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
|
||||||
|
// simple search matching strategy all should only return 1 document (ids: 2).
|
||||||
|
index
|
||||||
|
.search(json!({"q": "Captain Marvel", "attributesToSearchOn": ["title"], "matchingStrategy": "all"}), |response, code| {
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(response["hits"].as_array().unwrap().len(), @"1");
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn simple_search_on_no_field() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
|
||||||
|
// simple search on no field shouldn't return any document.
|
||||||
|
index
|
||||||
|
.search(json!({"q": "Captain Marvel", "attributesToSearchOn": []}), |response, code| {
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(response["hits"].as_array().unwrap().len(), @"0");
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn word_ranking_rule_order() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
|
||||||
|
|
||||||
|
// Document 3 should appear before document 2.
|
||||||
|
index
|
||||||
|
.search(
|
||||||
|
json!({"q": "Captain Marvel", "attributesToSearchOn": ["title"], "attributesToRetrieve": ["id"]}),
|
||||||
|
|response, code| {
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response["hits"]),
|
||||||
|
@r###"
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"id": "3"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "2"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
"###
|
||||||
|
);
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn word_ranking_rule_order_exact_words() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
|
||||||
|
index.update_settings_typo_tolerance(json!({"disableOnWords": ["Captain", "Marvel"]})).await;
|
||||||
|
index.wait_task(1).await;
|
||||||
|
|
||||||
|
// simple search should return 2 documents (ids: 2 and 3).
|
||||||
|
index
|
||||||
|
.search(
|
||||||
|
json!({"q": "Captain Marvel", "attributesToSearchOn": ["title"], "attributesToRetrieve": ["id"]}),
|
||||||
|
|response, code| {
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response["hits"]),
|
||||||
|
@r###"
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"id": "3"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "2"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
"###
|
||||||
|
);
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn typo_ranking_rule_order() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = index_with_documents(
|
||||||
|
&server,
|
||||||
|
&json!([
|
||||||
|
{
|
||||||
|
"title": "Capitain Marivel",
|
||||||
|
"desc": "Captain Marvel",
|
||||||
|
"id": "1",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Captain Marivel",
|
||||||
|
"desc": "a Shazam ersatz",
|
||||||
|
"id": "2",
|
||||||
|
}]),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
// Document 2 should appear before document 1.
|
||||||
|
index
|
||||||
|
.search(json!({"q": "Captain Marvel", "attributesToSearchOn": ["title"], "attributesToRetrieve": ["id"]}), |response, code| {
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response["hits"]),
|
||||||
|
@r###"
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"id": "2"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "1"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
"###
|
||||||
|
);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn attributes_ranking_rule_order() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = index_with_documents(
|
||||||
|
&server,
|
||||||
|
&json!([
|
||||||
|
{
|
||||||
|
"title": "Captain Marvel",
|
||||||
|
"desc": "a Shazam ersatz",
|
||||||
|
"footer": "The story of Captain Marvel",
|
||||||
|
"id": "1",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "The Avengers",
|
||||||
|
"desc": "Captain Marvel is far from the earth",
|
||||||
|
"footer": "A super hero team",
|
||||||
|
"id": "2",
|
||||||
|
}]),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
// Document 2 should appear before document 1.
|
||||||
|
index
|
||||||
|
.search(json!({"q": "Captain Marvel", "attributesToSearchOn": ["desc", "footer"], "attributesToRetrieve": ["id"]}), |response, code| {
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response["hits"]),
|
||||||
|
@r###"
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"id": "2"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "1"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
"###
|
||||||
|
);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn exactness_ranking_rule_order() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = index_with_documents(
|
||||||
|
&server,
|
||||||
|
&json!([
|
||||||
|
{
|
||||||
|
"title": "Captain Marvel",
|
||||||
|
"desc": "Captain Marivel",
|
||||||
|
"id": "1",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Captain Marvel",
|
||||||
|
"desc": "CaptainMarvel",
|
||||||
|
"id": "2",
|
||||||
|
}]),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
// Document 2 should appear before document 1.
|
||||||
|
index
|
||||||
|
.search(json!({"q": "Captain Marvel", "attributesToRetrieve": ["id"], "attributesToSearchOn": ["desc"]}), |response, code| {
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response["hits"]),
|
||||||
|
@r###"
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"id": "2"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "1"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
"###
|
||||||
|
);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
@ -128,6 +128,16 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
|
|||||||
}
|
}
|
||||||
)]
|
)]
|
||||||
InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String> },
|
InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String> },
|
||||||
|
#[error("Attribute `{}` is not searchable. Available searchable attributes are: `{}{}`.",
|
||||||
|
.field,
|
||||||
|
.valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", "),
|
||||||
|
.hidden_fields.then_some(", <..hidden-attributes>").unwrap_or(""),
|
||||||
|
)]
|
||||||
|
InvalidSearchableAttribute {
|
||||||
|
field: String,
|
||||||
|
valid_fields: BTreeSet<String>,
|
||||||
|
hidden_fields: bool,
|
||||||
|
},
|
||||||
#[error("{}", HeedError::BadOpenOptions)]
|
#[error("{}", HeedError::BadOpenOptions)]
|
||||||
InvalidLmdbOpenOptions,
|
InvalidLmdbOpenOptions,
|
||||||
#[error("You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.")]
|
#[error("You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.")]
|
||||||
|
@ -23,3 +23,9 @@ pub use self::roaring_bitmap_length::{
|
|||||||
pub use self::script_language_codec::ScriptLanguageCodec;
|
pub use self::script_language_codec::ScriptLanguageCodec;
|
||||||
pub use self::str_beu32_codec::{StrBEU16Codec, StrBEU32Codec};
|
pub use self::str_beu32_codec::{StrBEU16Codec, StrBEU32Codec};
|
||||||
pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec};
|
pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec};
|
||||||
|
|
||||||
|
pub trait BytesDecodeOwned {
|
||||||
|
type DItem;
|
||||||
|
|
||||||
|
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem>;
|
||||||
|
}
|
||||||
|
@ -2,8 +2,11 @@ use std::borrow::Cow;
|
|||||||
use std::convert::TryInto;
|
use std::convert::TryInto;
|
||||||
use std::mem::size_of;
|
use std::mem::size_of;
|
||||||
|
|
||||||
|
use heed::BytesDecode;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
|
use crate::heed_codec::BytesDecodeOwned;
|
||||||
|
|
||||||
pub struct BoRoaringBitmapCodec;
|
pub struct BoRoaringBitmapCodec;
|
||||||
|
|
||||||
impl BoRoaringBitmapCodec {
|
impl BoRoaringBitmapCodec {
|
||||||
@ -13,7 +16,7 @@ impl BoRoaringBitmapCodec {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl heed::BytesDecode<'_> for BoRoaringBitmapCodec {
|
impl BytesDecode<'_> for BoRoaringBitmapCodec {
|
||||||
type DItem = RoaringBitmap;
|
type DItem = RoaringBitmap;
|
||||||
|
|
||||||
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
|
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
|
||||||
@ -28,6 +31,14 @@ impl heed::BytesDecode<'_> for BoRoaringBitmapCodec {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl BytesDecodeOwned for BoRoaringBitmapCodec {
|
||||||
|
type DItem = RoaringBitmap;
|
||||||
|
|
||||||
|
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
|
||||||
|
Self::bytes_decode(bytes)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl heed::BytesEncode<'_> for BoRoaringBitmapCodec {
|
impl heed::BytesEncode<'_> for BoRoaringBitmapCodec {
|
||||||
type EItem = RoaringBitmap;
|
type EItem = RoaringBitmap;
|
||||||
|
|
||||||
|
@ -5,6 +5,8 @@ use std::mem::size_of;
|
|||||||
use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt};
|
use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
|
use crate::heed_codec::BytesDecodeOwned;
|
||||||
|
|
||||||
/// This is the limit where using a byteorder became less size efficient
|
/// This is the limit where using a byteorder became less size efficient
|
||||||
/// than using a direct roaring encoding, it is also the point where we are able
|
/// than using a direct roaring encoding, it is also the point where we are able
|
||||||
/// to determine the encoding used only by using the array of bytes length.
|
/// to determine the encoding used only by using the array of bytes length.
|
||||||
@ -103,6 +105,14 @@ impl heed::BytesDecode<'_> for CboRoaringBitmapCodec {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl BytesDecodeOwned for CboRoaringBitmapCodec {
|
||||||
|
type DItem = RoaringBitmap;
|
||||||
|
|
||||||
|
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
|
||||||
|
Self::deserialize_from(bytes).ok()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl heed::BytesEncode<'_> for CboRoaringBitmapCodec {
|
impl heed::BytesEncode<'_> for CboRoaringBitmapCodec {
|
||||||
type EItem = RoaringBitmap;
|
type EItem = RoaringBitmap;
|
||||||
|
|
||||||
|
@ -2,6 +2,8 @@ use std::borrow::Cow;
|
|||||||
|
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
|
use crate::heed_codec::BytesDecodeOwned;
|
||||||
|
|
||||||
pub struct RoaringBitmapCodec;
|
pub struct RoaringBitmapCodec;
|
||||||
|
|
||||||
impl heed::BytesDecode<'_> for RoaringBitmapCodec {
|
impl heed::BytesDecode<'_> for RoaringBitmapCodec {
|
||||||
@ -12,6 +14,14 @@ impl heed::BytesDecode<'_> for RoaringBitmapCodec {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl BytesDecodeOwned for RoaringBitmapCodec {
|
||||||
|
type DItem = RoaringBitmap;
|
||||||
|
|
||||||
|
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
|
||||||
|
RoaringBitmap::deserialize_from(bytes).ok()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl heed::BytesEncode<'_> for RoaringBitmapCodec {
|
impl heed::BytesEncode<'_> for RoaringBitmapCodec {
|
||||||
type EItem = RoaringBitmap;
|
type EItem = RoaringBitmap;
|
||||||
|
|
||||||
|
@ -1,11 +1,23 @@
|
|||||||
use std::mem;
|
use std::mem;
|
||||||
|
|
||||||
|
use heed::BytesDecode;
|
||||||
|
|
||||||
|
use crate::heed_codec::BytesDecodeOwned;
|
||||||
|
|
||||||
pub struct BoRoaringBitmapLenCodec;
|
pub struct BoRoaringBitmapLenCodec;
|
||||||
|
|
||||||
impl heed::BytesDecode<'_> for BoRoaringBitmapLenCodec {
|
impl BytesDecode<'_> for BoRoaringBitmapLenCodec {
|
||||||
type DItem = u64;
|
type DItem = u64;
|
||||||
|
|
||||||
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
|
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
|
||||||
Some((bytes.len() / mem::size_of::<u32>()) as u64)
|
Some((bytes.len() / mem::size_of::<u32>()) as u64)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl BytesDecodeOwned for BoRoaringBitmapLenCodec {
|
||||||
|
type DItem = u64;
|
||||||
|
|
||||||
|
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
|
||||||
|
Self::bytes_decode(bytes)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -1,11 +1,14 @@
|
|||||||
use std::mem;
|
use std::mem;
|
||||||
|
|
||||||
|
use heed::BytesDecode;
|
||||||
|
|
||||||
use super::{BoRoaringBitmapLenCodec, RoaringBitmapLenCodec};
|
use super::{BoRoaringBitmapLenCodec, RoaringBitmapLenCodec};
|
||||||
use crate::heed_codec::roaring_bitmap::cbo_roaring_bitmap_codec::THRESHOLD;
|
use crate::heed_codec::roaring_bitmap::cbo_roaring_bitmap_codec::THRESHOLD;
|
||||||
|
use crate::heed_codec::BytesDecodeOwned;
|
||||||
|
|
||||||
pub struct CboRoaringBitmapLenCodec;
|
pub struct CboRoaringBitmapLenCodec;
|
||||||
|
|
||||||
impl heed::BytesDecode<'_> for CboRoaringBitmapLenCodec {
|
impl BytesDecode<'_> for CboRoaringBitmapLenCodec {
|
||||||
type DItem = u64;
|
type DItem = u64;
|
||||||
|
|
||||||
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
|
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
|
||||||
@ -20,3 +23,11 @@ impl heed::BytesDecode<'_> for CboRoaringBitmapLenCodec {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl BytesDecodeOwned for CboRoaringBitmapLenCodec {
|
||||||
|
type DItem = u64;
|
||||||
|
|
||||||
|
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
|
||||||
|
Self::bytes_decode(bytes)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -3,6 +3,8 @@ use std::mem;
|
|||||||
|
|
||||||
use byteorder::{LittleEndian, ReadBytesExt};
|
use byteorder::{LittleEndian, ReadBytesExt};
|
||||||
|
|
||||||
|
use crate::heed_codec::BytesDecodeOwned;
|
||||||
|
|
||||||
const SERIAL_COOKIE_NO_RUNCONTAINER: u32 = 12346;
|
const SERIAL_COOKIE_NO_RUNCONTAINER: u32 = 12346;
|
||||||
const SERIAL_COOKIE: u16 = 12347;
|
const SERIAL_COOKIE: u16 = 12347;
|
||||||
|
|
||||||
@ -59,6 +61,14 @@ impl heed::BytesDecode<'_> for RoaringBitmapLenCodec {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl BytesDecodeOwned for RoaringBitmapLenCodec {
|
||||||
|
type DItem = u64;
|
||||||
|
|
||||||
|
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
|
||||||
|
RoaringBitmapLenCodec::deserialize_from_slice(bytes).ok()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use heed::BytesEncode;
|
use heed::BytesEncode;
|
||||||
|
@ -29,6 +29,7 @@ pub struct Search<'a> {
|
|||||||
offset: usize,
|
offset: usize,
|
||||||
limit: usize,
|
limit: usize,
|
||||||
sort_criteria: Option<Vec<AscDesc>>,
|
sort_criteria: Option<Vec<AscDesc>>,
|
||||||
|
searchable_attributes: Option<&'a [String]>,
|
||||||
geo_strategy: new::GeoSortStrategy,
|
geo_strategy: new::GeoSortStrategy,
|
||||||
terms_matching_strategy: TermsMatchingStrategy,
|
terms_matching_strategy: TermsMatchingStrategy,
|
||||||
scoring_strategy: ScoringStrategy,
|
scoring_strategy: ScoringStrategy,
|
||||||
@ -47,6 +48,7 @@ impl<'a> Search<'a> {
|
|||||||
offset: 0,
|
offset: 0,
|
||||||
limit: 20,
|
limit: 20,
|
||||||
sort_criteria: None,
|
sort_criteria: None,
|
||||||
|
searchable_attributes: None,
|
||||||
geo_strategy: new::GeoSortStrategy::default(),
|
geo_strategy: new::GeoSortStrategy::default(),
|
||||||
terms_matching_strategy: TermsMatchingStrategy::default(),
|
terms_matching_strategy: TermsMatchingStrategy::default(),
|
||||||
scoring_strategy: Default::default(),
|
scoring_strategy: Default::default(),
|
||||||
@ -82,6 +84,11 @@ impl<'a> Search<'a> {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn searchable_attributes(&mut self, searchable: &'a [String]) -> &mut Search<'a> {
|
||||||
|
self.searchable_attributes = Some(searchable);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
pub fn terms_matching_strategy(&mut self, value: TermsMatchingStrategy) -> &mut Search<'a> {
|
pub fn terms_matching_strategy(&mut self, value: TermsMatchingStrategy) -> &mut Search<'a> {
|
||||||
self.terms_matching_strategy = value;
|
self.terms_matching_strategy = value;
|
||||||
self
|
self
|
||||||
@ -117,6 +124,11 @@ impl<'a> Search<'a> {
|
|||||||
|
|
||||||
pub fn execute(&self) -> Result<SearchResult> {
|
pub fn execute(&self) -> Result<SearchResult> {
|
||||||
let mut ctx = SearchContext::new(self.index, self.rtxn);
|
let mut ctx = SearchContext::new(self.index, self.rtxn);
|
||||||
|
|
||||||
|
if let Some(searchable_attributes) = self.searchable_attributes {
|
||||||
|
ctx.searchable_attributes(searchable_attributes)?;
|
||||||
|
}
|
||||||
|
|
||||||
let PartialSearchResult { located_query_terms, candidates, documents_ids, document_scores } =
|
let PartialSearchResult { located_query_terms, candidates, documents_ids, document_scores } =
|
||||||
execute_search(
|
execute_search(
|
||||||
&mut ctx,
|
&mut ctx,
|
||||||
@ -154,6 +166,7 @@ impl fmt::Debug for Search<'_> {
|
|||||||
offset,
|
offset,
|
||||||
limit,
|
limit,
|
||||||
sort_criteria,
|
sort_criteria,
|
||||||
|
searchable_attributes,
|
||||||
geo_strategy: _,
|
geo_strategy: _,
|
||||||
terms_matching_strategy,
|
terms_matching_strategy,
|
||||||
scoring_strategy,
|
scoring_strategy,
|
||||||
@ -169,6 +182,7 @@ impl fmt::Debug for Search<'_> {
|
|||||||
.field("offset", offset)
|
.field("offset", offset)
|
||||||
.field("limit", limit)
|
.field("limit", limit)
|
||||||
.field("sort_criteria", sort_criteria)
|
.field("sort_criteria", sort_criteria)
|
||||||
|
.field("searchable_attributes", searchable_attributes)
|
||||||
.field("terms_matching_strategy", terms_matching_strategy)
|
.field("terms_matching_strategy", terms_matching_strategy)
|
||||||
.field("scoring_strategy", scoring_strategy)
|
.field("scoring_strategy", scoring_strategy)
|
||||||
.field("exhaustive_number_hits", exhaustive_number_hits)
|
.field("exhaustive_number_hits", exhaustive_number_hits)
|
||||||
|
@ -4,12 +4,13 @@ use std::hash::Hash;
|
|||||||
|
|
||||||
use fxhash::FxHashMap;
|
use fxhash::FxHashMap;
|
||||||
use heed::types::ByteSlice;
|
use heed::types::ByteSlice;
|
||||||
use heed::{BytesDecode, BytesEncode, Database, RoTxn};
|
use heed::{BytesEncode, Database, RoTxn};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::interner::Interned;
|
use super::interner::Interned;
|
||||||
use super::Word;
|
use super::Word;
|
||||||
use crate::heed_codec::StrBEU16Codec;
|
use crate::heed_codec::{BytesDecodeOwned, StrBEU16Codec};
|
||||||
|
use crate::update::{merge_cbo_roaring_bitmaps, MergeFn};
|
||||||
use crate::{
|
use crate::{
|
||||||
CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, RoaringBitmapCodec, SearchContext,
|
CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, RoaringBitmapCodec, SearchContext,
|
||||||
};
|
};
|
||||||
@ -22,50 +23,104 @@ use crate::{
|
|||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub struct DatabaseCache<'ctx> {
|
pub struct DatabaseCache<'ctx> {
|
||||||
pub word_pair_proximity_docids:
|
pub word_pair_proximity_docids:
|
||||||
FxHashMap<(u8, Interned<String>, Interned<String>), Option<&'ctx [u8]>>,
|
FxHashMap<(u8, Interned<String>, Interned<String>), Option<Cow<'ctx, [u8]>>>,
|
||||||
pub word_prefix_pair_proximity_docids:
|
pub word_prefix_pair_proximity_docids:
|
||||||
FxHashMap<(u8, Interned<String>, Interned<String>), Option<&'ctx [u8]>>,
|
FxHashMap<(u8, Interned<String>, Interned<String>), Option<Cow<'ctx, [u8]>>>,
|
||||||
pub prefix_word_pair_proximity_docids:
|
pub prefix_word_pair_proximity_docids:
|
||||||
FxHashMap<(u8, Interned<String>, Interned<String>), Option<&'ctx [u8]>>,
|
FxHashMap<(u8, Interned<String>, Interned<String>), Option<Cow<'ctx, [u8]>>>,
|
||||||
pub word_docids: FxHashMap<Interned<String>, Option<&'ctx [u8]>>,
|
pub word_docids: FxHashMap<Interned<String>, Option<Cow<'ctx, [u8]>>>,
|
||||||
pub exact_word_docids: FxHashMap<Interned<String>, Option<&'ctx [u8]>>,
|
pub exact_word_docids: FxHashMap<Interned<String>, Option<Cow<'ctx, [u8]>>>,
|
||||||
pub word_prefix_docids: FxHashMap<Interned<String>, Option<&'ctx [u8]>>,
|
pub word_prefix_docids: FxHashMap<Interned<String>, Option<Cow<'ctx, [u8]>>>,
|
||||||
pub exact_word_prefix_docids: FxHashMap<Interned<String>, Option<&'ctx [u8]>>,
|
pub exact_word_prefix_docids: FxHashMap<Interned<String>, Option<Cow<'ctx, [u8]>>>,
|
||||||
|
|
||||||
pub words_fst: Option<fst::Set<Cow<'ctx, [u8]>>>,
|
pub words_fst: Option<fst::Set<Cow<'ctx, [u8]>>>,
|
||||||
pub word_position_docids: FxHashMap<(Interned<String>, u16), Option<&'ctx [u8]>>,
|
pub word_position_docids: FxHashMap<(Interned<String>, u16), Option<Cow<'ctx, [u8]>>>,
|
||||||
pub word_prefix_position_docids: FxHashMap<(Interned<String>, u16), Option<&'ctx [u8]>>,
|
pub word_prefix_position_docids: FxHashMap<(Interned<String>, u16), Option<Cow<'ctx, [u8]>>>,
|
||||||
pub word_positions: FxHashMap<Interned<String>, Vec<u16>>,
|
pub word_positions: FxHashMap<Interned<String>, Vec<u16>>,
|
||||||
pub word_prefix_positions: FxHashMap<Interned<String>, Vec<u16>>,
|
pub word_prefix_positions: FxHashMap<Interned<String>, Vec<u16>>,
|
||||||
|
|
||||||
pub word_fid_docids: FxHashMap<(Interned<String>, u16), Option<&'ctx [u8]>>,
|
pub word_fid_docids: FxHashMap<(Interned<String>, u16), Option<Cow<'ctx, [u8]>>>,
|
||||||
pub word_prefix_fid_docids: FxHashMap<(Interned<String>, u16), Option<&'ctx [u8]>>,
|
pub word_prefix_fid_docids: FxHashMap<(Interned<String>, u16), Option<Cow<'ctx, [u8]>>>,
|
||||||
pub word_fids: FxHashMap<Interned<String>, Vec<u16>>,
|
pub word_fids: FxHashMap<Interned<String>, Vec<u16>>,
|
||||||
pub word_prefix_fids: FxHashMap<Interned<String>, Vec<u16>>,
|
pub word_prefix_fids: FxHashMap<Interned<String>, Vec<u16>>,
|
||||||
}
|
}
|
||||||
impl<'ctx> DatabaseCache<'ctx> {
|
impl<'ctx> DatabaseCache<'ctx> {
|
||||||
fn get_value<'v, K1, KC>(
|
fn get_value<'v, K1, KC, DC>(
|
||||||
txn: &'ctx RoTxn,
|
txn: &'ctx RoTxn,
|
||||||
cache_key: K1,
|
cache_key: K1,
|
||||||
db_key: &'v KC::EItem,
|
db_key: &'v KC::EItem,
|
||||||
cache: &mut FxHashMap<K1, Option<&'ctx [u8]>>,
|
cache: &mut FxHashMap<K1, Option<Cow<'ctx, [u8]>>>,
|
||||||
db: Database<KC, ByteSlice>,
|
db: Database<KC, ByteSlice>,
|
||||||
) -> Result<Option<&'ctx [u8]>>
|
) -> Result<Option<DC::DItem>>
|
||||||
where
|
where
|
||||||
K1: Copy + Eq + Hash,
|
K1: Copy + Eq + Hash,
|
||||||
KC: BytesEncode<'v>,
|
KC: BytesEncode<'v>,
|
||||||
|
DC: BytesDecodeOwned,
|
||||||
{
|
{
|
||||||
let bitmap_ptr = match cache.entry(cache_key) {
|
if let Entry::Vacant(entry) = cache.entry(cache_key) {
|
||||||
Entry::Occupied(bitmap_ptr) => *bitmap_ptr.get(),
|
let bitmap_ptr = db.get(txn, db_key)?.map(Cow::Borrowed);
|
||||||
Entry::Vacant(entry) => {
|
entry.insert(bitmap_ptr);
|
||||||
let bitmap_ptr = db.get(txn, db_key)?;
|
}
|
||||||
entry.insert(bitmap_ptr);
|
|
||||||
bitmap_ptr
|
match cache.get(&cache_key).unwrap() {
|
||||||
|
Some(Cow::Borrowed(bytes)) => {
|
||||||
|
DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some)
|
||||||
}
|
}
|
||||||
};
|
Some(Cow::Owned(bytes)) => {
|
||||||
Ok(bitmap_ptr)
|
DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some)
|
||||||
|
}
|
||||||
|
None => Ok(None),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_value_from_keys<'v, K1, KC, DC>(
|
||||||
|
txn: &'ctx RoTxn,
|
||||||
|
cache_key: K1,
|
||||||
|
db_keys: &'v [KC::EItem],
|
||||||
|
cache: &mut FxHashMap<K1, Option<Cow<'ctx, [u8]>>>,
|
||||||
|
db: Database<KC, ByteSlice>,
|
||||||
|
merger: MergeFn,
|
||||||
|
) -> Result<Option<DC::DItem>>
|
||||||
|
where
|
||||||
|
K1: Copy + Eq + Hash,
|
||||||
|
KC: BytesEncode<'v>,
|
||||||
|
DC: BytesDecodeOwned,
|
||||||
|
KC::EItem: Sized,
|
||||||
|
{
|
||||||
|
if let Entry::Vacant(entry) = cache.entry(cache_key) {
|
||||||
|
let bitmap_ptr: Option<Cow<'ctx, [u8]>> = match db_keys {
|
||||||
|
[] => None,
|
||||||
|
[key] => db.get(txn, key)?.map(Cow::Borrowed),
|
||||||
|
keys => {
|
||||||
|
let bitmaps = keys
|
||||||
|
.iter()
|
||||||
|
.filter_map(|key| db.get(txn, key).transpose())
|
||||||
|
.map(|v| v.map(Cow::Borrowed))
|
||||||
|
.collect::<std::result::Result<Vec<Cow<[u8]>>, _>>()?;
|
||||||
|
|
||||||
|
if bitmaps.is_empty() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(merger(&[], &bitmaps[..])?)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
entry.insert(bitmap_ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
match cache.get(&cache_key).unwrap() {
|
||||||
|
Some(Cow::Borrowed(bytes)) => {
|
||||||
|
DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some)
|
||||||
|
}
|
||||||
|
Some(Cow::Owned(bytes)) => {
|
||||||
|
DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some)
|
||||||
|
}
|
||||||
|
None => Ok(None),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'ctx> SearchContext<'ctx> {
|
impl<'ctx> SearchContext<'ctx> {
|
||||||
pub fn get_words_fst(&mut self) -> Result<fst::Set<Cow<'ctx, [u8]>>> {
|
pub fn get_words_fst(&mut self) -> Result<fst::Set<Cow<'ctx, [u8]>>> {
|
||||||
if let Some(fst) = self.db_cache.words_fst.clone() {
|
if let Some(fst) = self.db_cache.words_fst.clone() {
|
||||||
@ -99,30 +154,41 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
|
|
||||||
/// Retrieve or insert the given value in the `word_docids` database.
|
/// Retrieve or insert the given value in the `word_docids` database.
|
||||||
fn get_db_word_docids(&mut self, word: Interned<String>) -> Result<Option<RoaringBitmap>> {
|
fn get_db_word_docids(&mut self, word: Interned<String>) -> Result<Option<RoaringBitmap>> {
|
||||||
DatabaseCache::get_value(
|
match &self.restricted_fids {
|
||||||
self.txn,
|
Some(restricted_fids) => {
|
||||||
word,
|
let interned = self.word_interner.get(word).as_str();
|
||||||
self.word_interner.get(word).as_str(),
|
let keys: Vec<_> = restricted_fids.iter().map(|fid| (interned, *fid)).collect();
|
||||||
&mut self.db_cache.word_docids,
|
|
||||||
self.index.word_docids.remap_data_type::<ByteSlice>(),
|
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
|
||||||
)?
|
self.txn,
|
||||||
.map(|bytes| RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
|
word,
|
||||||
.transpose()
|
&keys[..],
|
||||||
|
&mut self.db_cache.word_docids,
|
||||||
|
self.index.word_fid_docids.remap_data_type::<ByteSlice>(),
|
||||||
|
merge_cbo_roaring_bitmaps,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
None => DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
|
||||||
|
self.txn,
|
||||||
|
word,
|
||||||
|
self.word_interner.get(word).as_str(),
|
||||||
|
&mut self.db_cache.word_docids,
|
||||||
|
self.index.word_docids.remap_data_type::<ByteSlice>(),
|
||||||
|
),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_db_exact_word_docids(
|
fn get_db_exact_word_docids(
|
||||||
&mut self,
|
&mut self,
|
||||||
word: Interned<String>,
|
word: Interned<String>,
|
||||||
) -> Result<Option<RoaringBitmap>> {
|
) -> Result<Option<RoaringBitmap>> {
|
||||||
DatabaseCache::get_value(
|
DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
|
||||||
self.txn,
|
self.txn,
|
||||||
word,
|
word,
|
||||||
self.word_interner.get(word).as_str(),
|
self.word_interner.get(word).as_str(),
|
||||||
&mut self.db_cache.exact_word_docids,
|
&mut self.db_cache.exact_word_docids,
|
||||||
self.index.exact_word_docids.remap_data_type::<ByteSlice>(),
|
self.index.exact_word_docids.remap_data_type::<ByteSlice>(),
|
||||||
)?
|
)
|
||||||
.map(|bytes| RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
|
|
||||||
.transpose()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn word_prefix_docids(&mut self, prefix: Word) -> Result<Option<RoaringBitmap>> {
|
pub fn word_prefix_docids(&mut self, prefix: Word) -> Result<Option<RoaringBitmap>> {
|
||||||
@ -150,30 +216,41 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
&mut self,
|
&mut self,
|
||||||
prefix: Interned<String>,
|
prefix: Interned<String>,
|
||||||
) -> Result<Option<RoaringBitmap>> {
|
) -> Result<Option<RoaringBitmap>> {
|
||||||
DatabaseCache::get_value(
|
match &self.restricted_fids {
|
||||||
self.txn,
|
Some(restricted_fids) => {
|
||||||
prefix,
|
let interned = self.word_interner.get(prefix).as_str();
|
||||||
self.word_interner.get(prefix).as_str(),
|
let keys: Vec<_> = restricted_fids.iter().map(|fid| (interned, *fid)).collect();
|
||||||
&mut self.db_cache.word_prefix_docids,
|
|
||||||
self.index.word_prefix_docids.remap_data_type::<ByteSlice>(),
|
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
|
||||||
)?
|
self.txn,
|
||||||
.map(|bytes| RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
|
prefix,
|
||||||
.transpose()
|
&keys[..],
|
||||||
|
&mut self.db_cache.word_prefix_docids,
|
||||||
|
self.index.word_prefix_fid_docids.remap_data_type::<ByteSlice>(),
|
||||||
|
merge_cbo_roaring_bitmaps,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
None => DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
|
||||||
|
self.txn,
|
||||||
|
prefix,
|
||||||
|
self.word_interner.get(prefix).as_str(),
|
||||||
|
&mut self.db_cache.word_prefix_docids,
|
||||||
|
self.index.word_prefix_docids.remap_data_type::<ByteSlice>(),
|
||||||
|
),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_db_exact_word_prefix_docids(
|
fn get_db_exact_word_prefix_docids(
|
||||||
&mut self,
|
&mut self,
|
||||||
prefix: Interned<String>,
|
prefix: Interned<String>,
|
||||||
) -> Result<Option<RoaringBitmap>> {
|
) -> Result<Option<RoaringBitmap>> {
|
||||||
DatabaseCache::get_value(
|
DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
|
||||||
self.txn,
|
self.txn,
|
||||||
prefix,
|
prefix,
|
||||||
self.word_interner.get(prefix).as_str(),
|
self.word_interner.get(prefix).as_str(),
|
||||||
&mut self.db_cache.exact_word_prefix_docids,
|
&mut self.db_cache.exact_word_prefix_docids,
|
||||||
self.index.exact_word_prefix_docids.remap_data_type::<ByteSlice>(),
|
self.index.exact_word_prefix_docids.remap_data_type::<ByteSlice>(),
|
||||||
)?
|
)
|
||||||
.map(|bytes| RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
|
|
||||||
.transpose()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_db_word_pair_proximity_docids(
|
pub fn get_db_word_pair_proximity_docids(
|
||||||
@ -182,7 +259,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
word2: Interned<String>,
|
word2: Interned<String>,
|
||||||
proximity: u8,
|
proximity: u8,
|
||||||
) -> Result<Option<RoaringBitmap>> {
|
) -> Result<Option<RoaringBitmap>> {
|
||||||
DatabaseCache::get_value(
|
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||||
self.txn,
|
self.txn,
|
||||||
(proximity, word1, word2),
|
(proximity, word1, word2),
|
||||||
&(
|
&(
|
||||||
@ -192,9 +269,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
),
|
),
|
||||||
&mut self.db_cache.word_pair_proximity_docids,
|
&mut self.db_cache.word_pair_proximity_docids,
|
||||||
self.index.word_pair_proximity_docids.remap_data_type::<ByteSlice>(),
|
self.index.word_pair_proximity_docids.remap_data_type::<ByteSlice>(),
|
||||||
)?
|
)
|
||||||
.map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
|
|
||||||
.transpose()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_db_word_pair_proximity_docids_len(
|
pub fn get_db_word_pair_proximity_docids_len(
|
||||||
@ -203,7 +278,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
word2: Interned<String>,
|
word2: Interned<String>,
|
||||||
proximity: u8,
|
proximity: u8,
|
||||||
) -> Result<Option<u64>> {
|
) -> Result<Option<u64>> {
|
||||||
DatabaseCache::get_value(
|
DatabaseCache::get_value::<_, _, CboRoaringBitmapLenCodec>(
|
||||||
self.txn,
|
self.txn,
|
||||||
(proximity, word1, word2),
|
(proximity, word1, word2),
|
||||||
&(
|
&(
|
||||||
@ -213,11 +288,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
),
|
),
|
||||||
&mut self.db_cache.word_pair_proximity_docids,
|
&mut self.db_cache.word_pair_proximity_docids,
|
||||||
self.index.word_pair_proximity_docids.remap_data_type::<ByteSlice>(),
|
self.index.word_pair_proximity_docids.remap_data_type::<ByteSlice>(),
|
||||||
)?
|
)
|
||||||
.map(|bytes| {
|
|
||||||
CboRoaringBitmapLenCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into())
|
|
||||||
})
|
|
||||||
.transpose()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_db_word_prefix_pair_proximity_docids(
|
pub fn get_db_word_prefix_pair_proximity_docids(
|
||||||
@ -226,7 +297,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
prefix2: Interned<String>,
|
prefix2: Interned<String>,
|
||||||
proximity: u8,
|
proximity: u8,
|
||||||
) -> Result<Option<RoaringBitmap>> {
|
) -> Result<Option<RoaringBitmap>> {
|
||||||
DatabaseCache::get_value(
|
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||||
self.txn,
|
self.txn,
|
||||||
(proximity, word1, prefix2),
|
(proximity, word1, prefix2),
|
||||||
&(
|
&(
|
||||||
@ -236,9 +307,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
),
|
),
|
||||||
&mut self.db_cache.word_prefix_pair_proximity_docids,
|
&mut self.db_cache.word_prefix_pair_proximity_docids,
|
||||||
self.index.word_prefix_pair_proximity_docids.remap_data_type::<ByteSlice>(),
|
self.index.word_prefix_pair_proximity_docids.remap_data_type::<ByteSlice>(),
|
||||||
)?
|
)
|
||||||
.map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
|
|
||||||
.transpose()
|
|
||||||
}
|
}
|
||||||
pub fn get_db_prefix_word_pair_proximity_docids(
|
pub fn get_db_prefix_word_pair_proximity_docids(
|
||||||
&mut self,
|
&mut self,
|
||||||
@ -246,7 +315,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
right: Interned<String>,
|
right: Interned<String>,
|
||||||
proximity: u8,
|
proximity: u8,
|
||||||
) -> Result<Option<RoaringBitmap>> {
|
) -> Result<Option<RoaringBitmap>> {
|
||||||
DatabaseCache::get_value(
|
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||||
self.txn,
|
self.txn,
|
||||||
(proximity, left_prefix, right),
|
(proximity, left_prefix, right),
|
||||||
&(
|
&(
|
||||||
@ -256,9 +325,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
),
|
),
|
||||||
&mut self.db_cache.prefix_word_pair_proximity_docids,
|
&mut self.db_cache.prefix_word_pair_proximity_docids,
|
||||||
self.index.prefix_word_pair_proximity_docids.remap_data_type::<ByteSlice>(),
|
self.index.prefix_word_pair_proximity_docids.remap_data_type::<ByteSlice>(),
|
||||||
)?
|
)
|
||||||
.map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
|
|
||||||
.transpose()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_db_word_fid_docids(
|
pub fn get_db_word_fid_docids(
|
||||||
@ -266,15 +333,18 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
word: Interned<String>,
|
word: Interned<String>,
|
||||||
fid: u16,
|
fid: u16,
|
||||||
) -> Result<Option<RoaringBitmap>> {
|
) -> Result<Option<RoaringBitmap>> {
|
||||||
DatabaseCache::get_value(
|
// if the requested fid isn't in the restricted list, return None.
|
||||||
|
if self.restricted_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) {
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
|
||||||
|
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||||
self.txn,
|
self.txn,
|
||||||
(word, fid),
|
(word, fid),
|
||||||
&(self.word_interner.get(word).as_str(), fid),
|
&(self.word_interner.get(word).as_str(), fid),
|
||||||
&mut self.db_cache.word_fid_docids,
|
&mut self.db_cache.word_fid_docids,
|
||||||
self.index.word_fid_docids.remap_data_type::<ByteSlice>(),
|
self.index.word_fid_docids.remap_data_type::<ByteSlice>(),
|
||||||
)?
|
)
|
||||||
.map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
|
|
||||||
.transpose()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_db_word_prefix_fid_docids(
|
pub fn get_db_word_prefix_fid_docids(
|
||||||
@ -282,15 +352,18 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
word_prefix: Interned<String>,
|
word_prefix: Interned<String>,
|
||||||
fid: u16,
|
fid: u16,
|
||||||
) -> Result<Option<RoaringBitmap>> {
|
) -> Result<Option<RoaringBitmap>> {
|
||||||
DatabaseCache::get_value(
|
// if the requested fid isn't in the restricted list, return None.
|
||||||
|
if self.restricted_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) {
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
|
||||||
|
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||||
self.txn,
|
self.txn,
|
||||||
(word_prefix, fid),
|
(word_prefix, fid),
|
||||||
&(self.word_interner.get(word_prefix).as_str(), fid),
|
&(self.word_interner.get(word_prefix).as_str(), fid),
|
||||||
&mut self.db_cache.word_prefix_fid_docids,
|
&mut self.db_cache.word_prefix_fid_docids,
|
||||||
self.index.word_prefix_fid_docids.remap_data_type::<ByteSlice>(),
|
self.index.word_prefix_fid_docids.remap_data_type::<ByteSlice>(),
|
||||||
)?
|
)
|
||||||
.map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
|
|
||||||
.transpose()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_db_word_fids(&mut self, word: Interned<String>) -> Result<Vec<u16>> {
|
pub fn get_db_word_fids(&mut self, word: Interned<String>) -> Result<Vec<u16>> {
|
||||||
@ -309,7 +382,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
for result in remap_key_type {
|
for result in remap_key_type {
|
||||||
let ((_, fid), value) = result?;
|
let ((_, fid), value) = result?;
|
||||||
// filling other caches to avoid searching for them again
|
// filling other caches to avoid searching for them again
|
||||||
self.db_cache.word_fid_docids.insert((word, fid), Some(value));
|
self.db_cache.word_fid_docids.insert((word, fid), Some(Cow::Borrowed(value)));
|
||||||
fids.push(fid);
|
fids.push(fid);
|
||||||
}
|
}
|
||||||
entry.insert(fids.clone());
|
entry.insert(fids.clone());
|
||||||
@ -335,7 +408,9 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
for result in remap_key_type {
|
for result in remap_key_type {
|
||||||
let ((_, fid), value) = result?;
|
let ((_, fid), value) = result?;
|
||||||
// filling other caches to avoid searching for them again
|
// filling other caches to avoid searching for them again
|
||||||
self.db_cache.word_prefix_fid_docids.insert((word_prefix, fid), Some(value));
|
self.db_cache
|
||||||
|
.word_prefix_fid_docids
|
||||||
|
.insert((word_prefix, fid), Some(Cow::Borrowed(value)));
|
||||||
fids.push(fid);
|
fids.push(fid);
|
||||||
}
|
}
|
||||||
entry.insert(fids.clone());
|
entry.insert(fids.clone());
|
||||||
@ -350,15 +425,13 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
word: Interned<String>,
|
word: Interned<String>,
|
||||||
position: u16,
|
position: u16,
|
||||||
) -> Result<Option<RoaringBitmap>> {
|
) -> Result<Option<RoaringBitmap>> {
|
||||||
DatabaseCache::get_value(
|
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||||
self.txn,
|
self.txn,
|
||||||
(word, position),
|
(word, position),
|
||||||
&(self.word_interner.get(word).as_str(), position),
|
&(self.word_interner.get(word).as_str(), position),
|
||||||
&mut self.db_cache.word_position_docids,
|
&mut self.db_cache.word_position_docids,
|
||||||
self.index.word_position_docids.remap_data_type::<ByteSlice>(),
|
self.index.word_position_docids.remap_data_type::<ByteSlice>(),
|
||||||
)?
|
)
|
||||||
.map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
|
|
||||||
.transpose()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_db_word_prefix_position_docids(
|
pub fn get_db_word_prefix_position_docids(
|
||||||
@ -366,15 +439,13 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
word_prefix: Interned<String>,
|
word_prefix: Interned<String>,
|
||||||
position: u16,
|
position: u16,
|
||||||
) -> Result<Option<RoaringBitmap>> {
|
) -> Result<Option<RoaringBitmap>> {
|
||||||
DatabaseCache::get_value(
|
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||||
self.txn,
|
self.txn,
|
||||||
(word_prefix, position),
|
(word_prefix, position),
|
||||||
&(self.word_interner.get(word_prefix).as_str(), position),
|
&(self.word_interner.get(word_prefix).as_str(), position),
|
||||||
&mut self.db_cache.word_prefix_position_docids,
|
&mut self.db_cache.word_prefix_position_docids,
|
||||||
self.index.word_prefix_position_docids.remap_data_type::<ByteSlice>(),
|
self.index.word_prefix_position_docids.remap_data_type::<ByteSlice>(),
|
||||||
)?
|
)
|
||||||
.map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
|
|
||||||
.transpose()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_db_word_positions(&mut self, word: Interned<String>) -> Result<Vec<u16>> {
|
pub fn get_db_word_positions(&mut self, word: Interned<String>) -> Result<Vec<u16>> {
|
||||||
@ -393,7 +464,9 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
for result in remap_key_type {
|
for result in remap_key_type {
|
||||||
let ((_, position), value) = result?;
|
let ((_, position), value) = result?;
|
||||||
// filling other caches to avoid searching for them again
|
// filling other caches to avoid searching for them again
|
||||||
self.db_cache.word_position_docids.insert((word, position), Some(value));
|
self.db_cache
|
||||||
|
.word_position_docids
|
||||||
|
.insert((word, position), Some(Cow::Borrowed(value)));
|
||||||
positions.push(position);
|
positions.push(position);
|
||||||
}
|
}
|
||||||
entry.insert(positions.clone());
|
entry.insert(positions.clone());
|
||||||
@ -424,7 +497,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
// filling other caches to avoid searching for them again
|
// filling other caches to avoid searching for them again
|
||||||
self.db_cache
|
self.db_cache
|
||||||
.word_prefix_position_docids
|
.word_prefix_position_docids
|
||||||
.insert((word_prefix, position), Some(value));
|
.insert((word_prefix, position), Some(Cow::Borrowed(value)));
|
||||||
positions.push(position);
|
positions.push(position);
|
||||||
}
|
}
|
||||||
entry.insert(positions.clone());
|
entry.insert(positions.clone());
|
||||||
|
@ -20,7 +20,7 @@ mod sort;
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests;
|
mod tests;
|
||||||
|
|
||||||
use std::collections::HashSet;
|
use std::collections::{BTreeSet, HashSet};
|
||||||
|
|
||||||
use bucket_sort::{bucket_sort, BucketSortOutput};
|
use bucket_sort::{bucket_sort, BucketSortOutput};
|
||||||
use charabia::TokenizerBuilder;
|
use charabia::TokenizerBuilder;
|
||||||
@ -46,6 +46,7 @@ use self::geo_sort::GeoSort;
|
|||||||
pub use self::geo_sort::Strategy as GeoSortStrategy;
|
pub use self::geo_sort::Strategy as GeoSortStrategy;
|
||||||
use self::graph_based_ranking_rule::Words;
|
use self::graph_based_ranking_rule::Words;
|
||||||
use self::interner::Interned;
|
use self::interner::Interned;
|
||||||
|
use crate::error::FieldIdMapMissingEntry;
|
||||||
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
||||||
use crate::search::new::distinct::apply_distinct_rule;
|
use crate::search::new::distinct::apply_distinct_rule;
|
||||||
use crate::{
|
use crate::{
|
||||||
@ -62,6 +63,7 @@ pub struct SearchContext<'ctx> {
|
|||||||
pub phrase_interner: DedupInterner<Phrase>,
|
pub phrase_interner: DedupInterner<Phrase>,
|
||||||
pub term_interner: Interner<QueryTerm>,
|
pub term_interner: Interner<QueryTerm>,
|
||||||
pub phrase_docids: PhraseDocIdsCache,
|
pub phrase_docids: PhraseDocIdsCache,
|
||||||
|
pub restricted_fids: Option<Vec<u16>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'ctx> SearchContext<'ctx> {
|
impl<'ctx> SearchContext<'ctx> {
|
||||||
@ -74,8 +76,66 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
phrase_interner: <_>::default(),
|
phrase_interner: <_>::default(),
|
||||||
term_interner: <_>::default(),
|
term_interner: <_>::default(),
|
||||||
phrase_docids: <_>::default(),
|
phrase_docids: <_>::default(),
|
||||||
|
restricted_fids: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn searchable_attributes(&mut self, searchable_attributes: &'ctx [String]) -> Result<()> {
|
||||||
|
let fids_map = self.index.fields_ids_map(self.txn)?;
|
||||||
|
let searchable_names = self.index.searchable_fields(self.txn)?;
|
||||||
|
|
||||||
|
let mut restricted_fids = Vec::new();
|
||||||
|
for field_name in searchable_attributes {
|
||||||
|
let searchable_contains_name =
|
||||||
|
searchable_names.as_ref().map(|sn| sn.iter().any(|name| name == field_name));
|
||||||
|
let fid = match (fids_map.id(field_name), searchable_contains_name) {
|
||||||
|
// The Field id exist and the field is searchable
|
||||||
|
(Some(fid), Some(true)) | (Some(fid), None) => fid,
|
||||||
|
// The field is searchable but the Field id doesn't exist => Internal Error
|
||||||
|
(None, Some(true)) => {
|
||||||
|
return Err(FieldIdMapMissingEntry::FieldName {
|
||||||
|
field_name: field_name.to_string(),
|
||||||
|
process: "search",
|
||||||
|
}
|
||||||
|
.into())
|
||||||
|
}
|
||||||
|
// The field is not searchable => User error
|
||||||
|
_otherwise => {
|
||||||
|
let mut valid_fields: BTreeSet<_> =
|
||||||
|
fids_map.names().map(String::from).collect();
|
||||||
|
|
||||||
|
// Filter by the searchable names
|
||||||
|
if let Some(sn) = searchable_names {
|
||||||
|
let searchable_names = sn.iter().map(|s| s.to_string()).collect();
|
||||||
|
valid_fields = &valid_fields & &searchable_names;
|
||||||
|
}
|
||||||
|
|
||||||
|
let searchable_count = valid_fields.len();
|
||||||
|
|
||||||
|
// Remove hidden fields
|
||||||
|
if let Some(dn) = self.index.displayed_fields(self.txn)? {
|
||||||
|
let displayable_names = dn.iter().map(|s| s.to_string()).collect();
|
||||||
|
valid_fields = &valid_fields & &displayable_names;
|
||||||
|
}
|
||||||
|
|
||||||
|
let hidden_fields = searchable_count > valid_fields.len();
|
||||||
|
let field = field_name.to_string();
|
||||||
|
return Err(UserError::InvalidSearchableAttribute {
|
||||||
|
field,
|
||||||
|
valid_fields,
|
||||||
|
hidden_fields,
|
||||||
|
}
|
||||||
|
.into());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
restricted_fids.push(fid);
|
||||||
|
}
|
||||||
|
|
||||||
|
self.restricted_fids = Some(restricted_fids);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Copy, PartialEq, PartialOrd, Ord, Eq)]
|
#[derive(Clone, Copy, PartialEq, PartialOrd, Ord, Eq)]
|
||||||
|
@ -4,7 +4,8 @@ pub use self::delete_documents::{DeleteDocuments, DeletionStrategy, DocumentDele
|
|||||||
pub use self::facet::bulk::FacetsUpdateBulk;
|
pub use self::facet::bulk::FacetsUpdateBulk;
|
||||||
pub use self::facet::incremental::FacetsUpdateIncrementalInner;
|
pub use self::facet::incremental::FacetsUpdateIncrementalInner;
|
||||||
pub use self::index_documents::{
|
pub use self::index_documents::{
|
||||||
DocumentAdditionResult, DocumentId, IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod,
|
merge_cbo_roaring_bitmaps, merge_roaring_bitmaps, DocumentAdditionResult, DocumentId,
|
||||||
|
IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, MergeFn,
|
||||||
};
|
};
|
||||||
pub use self::indexer_config::IndexerConfig;
|
pub use self::indexer_config::IndexerConfig;
|
||||||
pub use self::prefix_word_pairs::{
|
pub use self::prefix_word_pairs::{
|
||||||
|
Loading…
Reference in New Issue
Block a user