mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-25 22:34:28 +01:00
Merge #4888
4888: bring back v1.10.0 into main r=Kerollmops a=ManyTheFish Co-authored-by: Louis Dureuil <louis@meilisearch.com> Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com> Co-authored-by: Tamo <tamo@meilisearch.com> Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
commit
9a756cf2c5
@ -108,8 +108,10 @@ pub struct IndexStats {
|
|||||||
/// Association of every field name with the number of times it occurs in the documents.
|
/// Association of every field name with the number of times it occurs in the documents.
|
||||||
pub field_distribution: FieldDistribution,
|
pub field_distribution: FieldDistribution,
|
||||||
/// Creation date of the index.
|
/// Creation date of the index.
|
||||||
|
#[serde(with = "time::serde::rfc3339")]
|
||||||
pub created_at: OffsetDateTime,
|
pub created_at: OffsetDateTime,
|
||||||
/// Date of the last update of the index.
|
/// Date of the last update of the index.
|
||||||
|
#[serde(with = "time::serde::rfc3339")]
|
||||||
pub updated_at: OffsetDateTime,
|
pub updated_at: OffsetDateTime,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -72,6 +72,19 @@ fn on_panic(info: &std::panic::PanicInfo) {
|
|||||||
|
|
||||||
#[actix_web::main]
|
#[actix_web::main]
|
||||||
async fn main() -> anyhow::Result<()> {
|
async fn main() -> anyhow::Result<()> {
|
||||||
|
try_main().await.inspect_err(|error| {
|
||||||
|
tracing::error!(%error);
|
||||||
|
let mut current = error.source();
|
||||||
|
let mut depth = 0;
|
||||||
|
while let Some(source) = current {
|
||||||
|
tracing::info!(%source, depth, "Error caused by");
|
||||||
|
current = source.source();
|
||||||
|
depth += 1;
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn try_main() -> anyhow::Result<()> {
|
||||||
let (opt, config_read_from) = Opt::try_build()?;
|
let (opt, config_read_from) = Opt::try_build()?;
|
||||||
|
|
||||||
std::panic::set_hook(Box::new(on_panic));
|
std::panic::set_hook(Box::new(on_panic));
|
||||||
|
@ -682,6 +682,7 @@ generate_configure!(
|
|||||||
filterable_attributes,
|
filterable_attributes,
|
||||||
sortable_attributes,
|
sortable_attributes,
|
||||||
displayed_attributes,
|
displayed_attributes,
|
||||||
|
localized_attributes,
|
||||||
searchable_attributes,
|
searchable_attributes,
|
||||||
distinct_attribute,
|
distinct_attribute,
|
||||||
proximity_precision,
|
proximity_precision,
|
||||||
|
@ -1369,12 +1369,18 @@ pub fn perform_facet_search(
|
|||||||
None => TimeBudget::default(),
|
None => TimeBudget::default(),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// In the faceted search context, we want to use the intersection between the locales provided by the user
|
||||||
|
// and the locales of the facet string.
|
||||||
|
// If the facet string is not localized, we **ignore** the locales provided by the user because the facet data has no locale.
|
||||||
|
// If the user does not provide locales, we use the locales of the facet string.
|
||||||
let localized_attributes = index.localized_attributes_rules(&rtxn)?.unwrap_or_default();
|
let localized_attributes = index.localized_attributes_rules(&rtxn)?.unwrap_or_default();
|
||||||
let locales = locales.or_else(|| {
|
let localized_attributes_locales =
|
||||||
localized_attributes
|
localized_attributes.into_iter().find(|attr| attr.match_str(&facet_name));
|
||||||
|
let locales = localized_attributes_locales.map(|attr| {
|
||||||
|
attr.locales
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.find(|attr| attr.match_str(&facet_name))
|
.filter(|locale| locales.as_ref().map_or(true, |locales| locales.contains(locale)))
|
||||||
.map(|attr| attr.locales)
|
.collect()
|
||||||
});
|
});
|
||||||
|
|
||||||
let (search, _, _, _) =
|
let (search, _, _, _) =
|
||||||
|
@ -386,12 +386,39 @@ async fn force_locales() {
|
|||||||
|response, code| {
|
|response, code| {
|
||||||
snapshot!(response, @r###"
|
snapshot!(response, @r###"
|
||||||
{
|
{
|
||||||
"hits": [],
|
"hits": [
|
||||||
|
{
|
||||||
|
"name_zh": "进击的巨人",
|
||||||
|
"author_zh": "諫山創",
|
||||||
|
"description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。",
|
||||||
|
"id": 853,
|
||||||
|
"_vectors": {
|
||||||
|
"manual": [
|
||||||
|
1.0,
|
||||||
|
2.0,
|
||||||
|
3.0
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"_formatted": {
|
||||||
|
"name_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>",
|
||||||
|
"author_zh": "諫山創",
|
||||||
|
"description_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>是日本的漫画系列,由諫山 創作画。",
|
||||||
|
"id": "853",
|
||||||
|
"_vectors": {
|
||||||
|
"manual": [
|
||||||
|
"1.0",
|
||||||
|
"2.0",
|
||||||
|
"3.0"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
"query": "\"进击的巨人\"",
|
"query": "\"进击的巨人\"",
|
||||||
"processingTimeMs": "[duration]",
|
"processingTimeMs": "[duration]",
|
||||||
"limit": 20,
|
"limit": 20,
|
||||||
"offset": 0,
|
"offset": 0,
|
||||||
"estimatedTotalHits": 0
|
"estimatedTotalHits": 1
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@ -483,12 +510,39 @@ async fn force_locales_with_pattern() {
|
|||||||
|response, code| {
|
|response, code| {
|
||||||
snapshot!(response, @r###"
|
snapshot!(response, @r###"
|
||||||
{
|
{
|
||||||
"hits": [],
|
"hits": [
|
||||||
|
{
|
||||||
|
"name_zh": "进击的巨人",
|
||||||
|
"author_zh": "諫山創",
|
||||||
|
"description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。",
|
||||||
|
"id": 853,
|
||||||
|
"_vectors": {
|
||||||
|
"manual": [
|
||||||
|
1.0,
|
||||||
|
2.0,
|
||||||
|
3.0
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"_formatted": {
|
||||||
|
"name_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>",
|
||||||
|
"author_zh": "諫山創",
|
||||||
|
"description_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>是日本的漫画系列,由諫山 創作画。",
|
||||||
|
"id": "853",
|
||||||
|
"_vectors": {
|
||||||
|
"manual": [
|
||||||
|
"1.0",
|
||||||
|
"2.0",
|
||||||
|
"3.0"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
"query": "\"进击的巨人\"",
|
"query": "\"进击的巨人\"",
|
||||||
"processingTimeMs": "[duration]",
|
"processingTimeMs": "[duration]",
|
||||||
"limit": 20,
|
"limit": 20,
|
||||||
"offset": 0,
|
"offset": 0,
|
||||||
"estimatedTotalHits": 0
|
"estimatedTotalHits": 1
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@ -761,6 +815,275 @@ async fn force_different_locales_with_pattern() {
|
|||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn auto_infer_locales_at_search_with_attributes_to_search_on() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
|
||||||
|
let index = server.index("test");
|
||||||
|
let documents = DOCUMENTS.clone();
|
||||||
|
let (response, _) = index
|
||||||
|
.update_settings(
|
||||||
|
json!({
|
||||||
|
"searchableAttributes": ["name_en", "name_ja", "name_zh", "author_en", "author_ja", "author_zh", "description_en", "description_ja", "description_zh"],
|
||||||
|
"localizedAttributes": [
|
||||||
|
// force japanese
|
||||||
|
{"attributePatterns": ["*_zh"], "locales": ["jpn"]},
|
||||||
|
// force chinese
|
||||||
|
{"attributePatterns": ["*_ja"], "locales": ["cmn"]},
|
||||||
|
// any language
|
||||||
|
{"attributePatterns": ["*_en"], "locales": []}
|
||||||
|
]
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
snapshot!(response, @r###"
|
||||||
|
{
|
||||||
|
"taskUid": 0,
|
||||||
|
"indexUid": "test",
|
||||||
|
"status": "enqueued",
|
||||||
|
"type": "settingsUpdate",
|
||||||
|
"enqueuedAt": "[date]"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
index.add_documents(documents, None).await;
|
||||||
|
index.wait_task(1).await;
|
||||||
|
|
||||||
|
// auto infer any language
|
||||||
|
index
|
||||||
|
.search(
|
||||||
|
json!({"q": "\"进击的巨人\"", "attributesToHighlight": ["*"]}),
|
||||||
|
|response, code| {
|
||||||
|
snapshot!(response, @r###"
|
||||||
|
{
|
||||||
|
"hits": [],
|
||||||
|
"query": "\"进击的巨人\"",
|
||||||
|
"processingTimeMs": "[duration]",
|
||||||
|
"limit": 20,
|
||||||
|
"offset": 0,
|
||||||
|
"estimatedTotalHits": 0
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
// should infer chinese
|
||||||
|
index
|
||||||
|
.search(
|
||||||
|
json!({"q": "\"进击的巨人\"", "attributesToHighlight": ["*"], "attributesToSearchOn": ["name_zh", "description_zh"]}),
|
||||||
|
|response, code| {
|
||||||
|
snapshot!(response, @r###"
|
||||||
|
{
|
||||||
|
"hits": [
|
||||||
|
{
|
||||||
|
"name_zh": "进击的巨人",
|
||||||
|
"author_zh": "諫山創",
|
||||||
|
"description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。",
|
||||||
|
"id": 853,
|
||||||
|
"_vectors": {
|
||||||
|
"manual": [
|
||||||
|
1.0,
|
||||||
|
2.0,
|
||||||
|
3.0
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"_formatted": {
|
||||||
|
"name_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>",
|
||||||
|
"author_zh": "諫山創",
|
||||||
|
"description_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>是日本的漫画系列,由諫山 創作画。",
|
||||||
|
"id": "853",
|
||||||
|
"_vectors": {
|
||||||
|
"manual": [
|
||||||
|
"1.0",
|
||||||
|
"2.0",
|
||||||
|
"3.0"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"query": "\"进击的巨人\"",
|
||||||
|
"processingTimeMs": "[duration]",
|
||||||
|
"limit": 20,
|
||||||
|
"offset": 0,
|
||||||
|
"estimatedTotalHits": 1
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn auto_infer_locales_at_search() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
|
||||||
|
let index = server.index("test");
|
||||||
|
let documents = DOCUMENTS.clone();
|
||||||
|
let (response, _) = index
|
||||||
|
.update_settings(
|
||||||
|
json!({
|
||||||
|
"searchableAttributes": ["name_en", "name_ja", "name_zh", "author_en", "author_ja", "author_zh", "description_en", "description_ja", "description_zh"],
|
||||||
|
"localizedAttributes": [
|
||||||
|
// force japanese
|
||||||
|
{"attributePatterns": ["*"], "locales": ["jpn"]},
|
||||||
|
]
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
snapshot!(response, @r###"
|
||||||
|
{
|
||||||
|
"taskUid": 0,
|
||||||
|
"indexUid": "test",
|
||||||
|
"status": "enqueued",
|
||||||
|
"type": "settingsUpdate",
|
||||||
|
"enqueuedAt": "[date]"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
index.add_documents(documents, None).await;
|
||||||
|
index.wait_task(1).await;
|
||||||
|
|
||||||
|
index
|
||||||
|
.search(
|
||||||
|
json!({"q": "\"进击的巨人\"", "attributesToHighlight": ["*"]}),
|
||||||
|
|response, code| {
|
||||||
|
snapshot!(response, @r###"
|
||||||
|
{
|
||||||
|
"hits": [
|
||||||
|
{
|
||||||
|
"name_zh": "进击的巨人",
|
||||||
|
"author_zh": "諫山創",
|
||||||
|
"description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。",
|
||||||
|
"id": 853,
|
||||||
|
"_vectors": {
|
||||||
|
"manual": [
|
||||||
|
1.0,
|
||||||
|
2.0,
|
||||||
|
3.0
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"_formatted": {
|
||||||
|
"name_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>",
|
||||||
|
"author_zh": "諫山創",
|
||||||
|
"description_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>是日本的漫画系列,由諫山 創作画。",
|
||||||
|
"id": "853",
|
||||||
|
"_vectors": {
|
||||||
|
"manual": [
|
||||||
|
"1.0",
|
||||||
|
"2.0",
|
||||||
|
"3.0"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"query": "\"进击的巨人\"",
|
||||||
|
"processingTimeMs": "[duration]",
|
||||||
|
"limit": 20,
|
||||||
|
"offset": 0,
|
||||||
|
"estimatedTotalHits": 1
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
index
|
||||||
|
.search(
|
||||||
|
json!({"q": "\"进击的巨人\"", "attributesToHighlight": ["*"]}),
|
||||||
|
|response, code| {
|
||||||
|
snapshot!(response, @r###"
|
||||||
|
{
|
||||||
|
"hits": [
|
||||||
|
{
|
||||||
|
"name_zh": "进击的巨人",
|
||||||
|
"author_zh": "諫山創",
|
||||||
|
"description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。",
|
||||||
|
"id": 853,
|
||||||
|
"_vectors": {
|
||||||
|
"manual": [
|
||||||
|
1.0,
|
||||||
|
2.0,
|
||||||
|
3.0
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"_formatted": {
|
||||||
|
"name_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>",
|
||||||
|
"author_zh": "諫山創",
|
||||||
|
"description_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>是日本的漫画系列,由諫山 創作画。",
|
||||||
|
"id": "853",
|
||||||
|
"_vectors": {
|
||||||
|
"manual": [
|
||||||
|
"1.0",
|
||||||
|
"2.0",
|
||||||
|
"3.0"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"query": "\"进击的巨人\"",
|
||||||
|
"processingTimeMs": "[duration]",
|
||||||
|
"limit": 20,
|
||||||
|
"offset": 0,
|
||||||
|
"estimatedTotalHits": 1
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
index
|
||||||
|
.search(
|
||||||
|
json!({"q": "\"进击的巨人\"", "attributesToHighlight": ["*"]}),
|
||||||
|
|response, code| {
|
||||||
|
snapshot!(response, @r###"
|
||||||
|
{
|
||||||
|
"hits": [
|
||||||
|
{
|
||||||
|
"name_zh": "进击的巨人",
|
||||||
|
"author_zh": "諫山創",
|
||||||
|
"description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。",
|
||||||
|
"id": 853,
|
||||||
|
"_vectors": {
|
||||||
|
"manual": [
|
||||||
|
1.0,
|
||||||
|
2.0,
|
||||||
|
3.0
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"_formatted": {
|
||||||
|
"name_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>",
|
||||||
|
"author_zh": "諫山創",
|
||||||
|
"description_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>是日本的漫画系列,由諫山 創作画。",
|
||||||
|
"id": "853",
|
||||||
|
"_vectors": {
|
||||||
|
"manual": [
|
||||||
|
"1.0",
|
||||||
|
"2.0",
|
||||||
|
"3.0"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"query": "\"进击的巨人\"",
|
||||||
|
"processingTimeMs": "[duration]",
|
||||||
|
"limit": 20,
|
||||||
|
"offset": 0,
|
||||||
|
"estimatedTotalHits": 1
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn force_different_locales_with_pattern_nested() {
|
async fn force_different_locales_with_pattern_nested() {
|
||||||
let server = Server::new().await;
|
let server = Server::new().await;
|
||||||
|
@ -7,6 +7,7 @@ mod facet_search;
|
|||||||
mod formatted;
|
mod formatted;
|
||||||
mod geo;
|
mod geo;
|
||||||
mod hybrid;
|
mod hybrid;
|
||||||
|
#[cfg(not(feature = "chinese-pinyin"))]
|
||||||
mod locales;
|
mod locales;
|
||||||
mod matching_strategy;
|
mod matching_strategy;
|
||||||
mod multi;
|
mod multi;
|
||||||
@ -169,6 +170,7 @@ async fn negative_special_cases_search() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "default")]
|
#[cfg(feature = "default")]
|
||||||
|
#[cfg(not(feature = "chinese-pinyin"))]
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn test_kanji_language_detection() {
|
async fn test_kanji_language_detection() {
|
||||||
let server = Server::new().await;
|
let server = Server::new().await;
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
source: meilisearch/tests/search/errors.rs
|
source: meilisearch/tests/search/errors.rs
|
||||||
---
|
---
|
||||||
{
|
{
|
||||||
"uid": 0,
|
"uid": "[uid]",
|
||||||
"indexUid": "tamo",
|
"indexUid": "tamo",
|
||||||
"status": "succeeded",
|
"status": "succeeded",
|
||||||
"type": "indexCreation",
|
"type": "indexCreation",
|
||||||
|
@ -9,6 +9,7 @@ static DEFAULT_SETTINGS_VALUES: Lazy<HashMap<&'static str, Value>> = Lazy::new(|
|
|||||||
let mut map = HashMap::new();
|
let mut map = HashMap::new();
|
||||||
map.insert("displayed_attributes", json!(["*"]));
|
map.insert("displayed_attributes", json!(["*"]));
|
||||||
map.insert("searchable_attributes", json!(["*"]));
|
map.insert("searchable_attributes", json!(["*"]));
|
||||||
|
map.insert("localized_attributes", json!(null));
|
||||||
map.insert("filterable_attributes", json!([]));
|
map.insert("filterable_attributes", json!([]));
|
||||||
map.insert("distinct_attribute", json!(null));
|
map.insert("distinct_attribute", json!(null));
|
||||||
map.insert(
|
map.insert(
|
||||||
@ -409,6 +410,7 @@ macro_rules! test_setting_routes {
|
|||||||
test_setting_routes!(
|
test_setting_routes!(
|
||||||
filterable_attributes put,
|
filterable_attributes put,
|
||||||
displayed_attributes put,
|
displayed_attributes put,
|
||||||
|
localized_attributes put,
|
||||||
searchable_attributes put,
|
searchable_attributes put,
|
||||||
distinct_attribute put,
|
distinct_attribute put,
|
||||||
stop_words put,
|
stop_words put,
|
||||||
|
BIN
meilisearch/tests/vector/intel_gen.txt.gz
Normal file
BIN
meilisearch/tests/vector/intel_gen.txt.gz
Normal file
Binary file not shown.
@ -1,3 +1,4 @@
|
|||||||
|
mod openai;
|
||||||
mod rest;
|
mod rest;
|
||||||
mod settings;
|
mod settings;
|
||||||
|
|
||||||
@ -10,6 +11,22 @@ use crate::common::index::Index;
|
|||||||
use crate::common::{default_settings, GetAllDocumentsOptions, Server};
|
use crate::common::{default_settings, GetAllDocumentsOptions, Server};
|
||||||
use crate::json;
|
use crate::json;
|
||||||
|
|
||||||
|
async fn get_server_vector() -> Server {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(value, @r###"
|
||||||
|
{
|
||||||
|
"vectorStore": true,
|
||||||
|
"metrics": false,
|
||||||
|
"logsRoute": false,
|
||||||
|
"editDocumentsByFunction": false,
|
||||||
|
"containsFilter": false
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
server
|
||||||
|
}
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn add_remove_user_provided() {
|
async fn add_remove_user_provided() {
|
||||||
let server = Server::new().await;
|
let server = Server::new().await;
|
||||||
|
1873
meilisearch/tests/vector/openai.rs
Normal file
1873
meilisearch/tests/vector/openai.rs
Normal file
File diff suppressed because it is too large
Load Diff
BIN
meilisearch/tests/vector/openai_responses.json.gz
Normal file
BIN
meilisearch/tests/vector/openai_responses.json.gz
Normal file
Binary file not shown.
BIN
meilisearch/tests/vector/openai_tokenized_responses.json.gz
Normal file
BIN
meilisearch/tests/vector/openai_tokenized_responses.json.gz
Normal file
Binary file not shown.
@ -5,9 +5,9 @@ use reqwest::IntoUrl;
|
|||||||
use wiremock::matchers::{method, path};
|
use wiremock::matchers::{method, path};
|
||||||
use wiremock::{Mock, MockServer, Request, ResponseTemplate};
|
use wiremock::{Mock, MockServer, Request, ResponseTemplate};
|
||||||
|
|
||||||
use crate::common::{Server, Value};
|
use crate::common::Value;
|
||||||
use crate::json;
|
use crate::json;
|
||||||
use crate::vector::GetAllDocumentsOptions;
|
use crate::vector::{get_server_vector, GetAllDocumentsOptions};
|
||||||
|
|
||||||
async fn create_mock() -> (MockServer, Value) {
|
async fn create_mock() -> (MockServer, Value) {
|
||||||
let mock_server = MockServer::start().await;
|
let mock_server = MockServer::start().await;
|
||||||
@ -265,22 +265,6 @@ async fn dummy_testing_the_mock() {
|
|||||||
snapshot!(body, @r###"{"data":[4,4,4]}"###);
|
snapshot!(body, @r###"{"data":[4,4,4]}"###);
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn get_server_vector() -> Server {
|
|
||||||
let server = Server::new().await;
|
|
||||||
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
|
|
||||||
snapshot!(code, @"200 OK");
|
|
||||||
snapshot!(value, @r###"
|
|
||||||
{
|
|
||||||
"vectorStore": true,
|
|
||||||
"metrics": false,
|
|
||||||
"logsRoute": false,
|
|
||||||
"editDocumentsByFunction": false,
|
|
||||||
"containsFilter": false
|
|
||||||
}
|
|
||||||
"###);
|
|
||||||
server
|
|
||||||
}
|
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn bad_request() {
|
async fn bad_request() {
|
||||||
let (mock, _setting) = create_mock().await;
|
let (mock, _setting) = create_mock().await;
|
||||||
@ -1816,7 +1800,7 @@ async fn server_custom_header() {
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"error": {
|
"error": {
|
||||||
"message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with user error: could not authenticate against embedding server\n - server replied with `{\"error\":\"missing header 'my-nonstandard-auth'\"}`",
|
"message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with user error: could not authenticate against embedding server\n - server replied with `{\"error\":\"missing header 'my-nonstandard-auth'\"}`\n - Hint: Check the `apiKey` parameter in the embedder configuration",
|
||||||
"code": "vector_embedding_error",
|
"code": "vector_embedding_error",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||||
@ -1858,7 +1842,7 @@ async fn server_custom_header() {
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"error": {
|
"error": {
|
||||||
"message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with user error: could not authenticate against embedding server\n - server replied with `{\"error\":\"thou shall not pass, Balrog\"}`",
|
"message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with user error: could not authenticate against embedding server\n - server replied with `{\"error\":\"thou shall not pass, Balrog\"}`\n - Hint: Check the `apiKey` parameter in the embedder configuration",
|
||||||
"code": "vector_embedding_error",
|
"code": "vector_embedding_error",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||||
|
@ -9,7 +9,6 @@ use heed::{CompactionOption, Database, RoTxn, RwTxn, Unspecified};
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use rstar::RTree;
|
use rstar::RTree;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use time::OffsetDateTime;
|
|
||||||
|
|
||||||
use crate::documents::PrimaryKey;
|
use crate::documents::PrimaryKey;
|
||||||
use crate::error::{InternalError, UserError};
|
use crate::error::{InternalError, UserError};
|
||||||
@ -173,8 +172,8 @@ impl Index {
|
|||||||
pub fn new_with_creation_dates<P: AsRef<Path>>(
|
pub fn new_with_creation_dates<P: AsRef<Path>>(
|
||||||
mut options: heed::EnvOpenOptions,
|
mut options: heed::EnvOpenOptions,
|
||||||
path: P,
|
path: P,
|
||||||
created_at: OffsetDateTime,
|
created_at: time::OffsetDateTime,
|
||||||
updated_at: OffsetDateTime,
|
updated_at: time::OffsetDateTime,
|
||||||
) -> Result<Index> {
|
) -> Result<Index> {
|
||||||
use db_name::*;
|
use db_name::*;
|
||||||
|
|
||||||
@ -256,22 +255,22 @@ impl Index {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn new<P: AsRef<Path>>(options: heed::EnvOpenOptions, path: P) -> Result<Index> {
|
pub fn new<P: AsRef<Path>>(options: heed::EnvOpenOptions, path: P) -> Result<Index> {
|
||||||
let now = OffsetDateTime::now_utc();
|
let now = time::OffsetDateTime::now_utc();
|
||||||
Self::new_with_creation_dates(options, path, now, now)
|
Self::new_with_creation_dates(options, path, now, now)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn set_creation_dates(
|
fn set_creation_dates(
|
||||||
env: &heed::Env,
|
env: &heed::Env,
|
||||||
main: Database<Unspecified, Unspecified>,
|
main: Database<Unspecified, Unspecified>,
|
||||||
created_at: OffsetDateTime,
|
created_at: time::OffsetDateTime,
|
||||||
updated_at: OffsetDateTime,
|
updated_at: time::OffsetDateTime,
|
||||||
) -> heed::Result<()> {
|
) -> heed::Result<()> {
|
||||||
let mut txn = env.write_txn()?;
|
let mut txn = env.write_txn()?;
|
||||||
// The db was just created, we update its metadata with the relevant information.
|
// The db was just created, we update its metadata with the relevant information.
|
||||||
let main = main.remap_types::<Str, SerdeJson<OffsetDateTime>>();
|
let main = main.remap_types::<Str, SerdeJson<OffsetDateTime>>();
|
||||||
if main.get(&txn, main_key::CREATED_AT_KEY)?.is_none() {
|
if main.get(&txn, main_key::CREATED_AT_KEY)?.is_none() {
|
||||||
main.put(&mut txn, main_key::UPDATED_AT_KEY, &updated_at)?;
|
main.put(&mut txn, main_key::UPDATED_AT_KEY, &OffsetDateTime(updated_at))?;
|
||||||
main.put(&mut txn, main_key::CREATED_AT_KEY, &created_at)?;
|
main.put(&mut txn, main_key::CREATED_AT_KEY, &OffsetDateTime(created_at))?;
|
||||||
txn.commit()?;
|
txn.commit()?;
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
@ -371,7 +370,7 @@ impl Index {
|
|||||||
wtxn: &mut RwTxn<'_>,
|
wtxn: &mut RwTxn<'_>,
|
||||||
primary_key: &str,
|
primary_key: &str,
|
||||||
) -> heed::Result<()> {
|
) -> heed::Result<()> {
|
||||||
self.set_updated_at(wtxn, &OffsetDateTime::now_utc())?;
|
self.set_updated_at(wtxn, &time::OffsetDateTime::now_utc())?;
|
||||||
self.main.remap_types::<Str, Str>().put(wtxn, main_key::PRIMARY_KEY_KEY, primary_key)
|
self.main.remap_types::<Str, Str>().put(wtxn, main_key::PRIMARY_KEY_KEY, primary_key)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1323,7 +1322,7 @@ impl Index {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the index creation time.
|
/// Returns the index creation time.
|
||||||
pub fn created_at(&self, rtxn: &RoTxn<'_>) -> Result<OffsetDateTime> {
|
pub fn created_at(&self, rtxn: &RoTxn<'_>) -> Result<time::OffsetDateTime> {
|
||||||
Ok(self
|
Ok(self
|
||||||
.main
|
.main
|
||||||
.remap_types::<Str, SerdeJson<OffsetDateTime>>()
|
.remap_types::<Str, SerdeJson<OffsetDateTime>>()
|
||||||
@ -1331,11 +1330,12 @@ impl Index {
|
|||||||
.ok_or(InternalError::DatabaseMissingEntry {
|
.ok_or(InternalError::DatabaseMissingEntry {
|
||||||
db_name: db_name::MAIN,
|
db_name: db_name::MAIN,
|
||||||
key: Some(main_key::CREATED_AT_KEY),
|
key: Some(main_key::CREATED_AT_KEY),
|
||||||
})?)
|
})?
|
||||||
|
.0)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the index last updated time.
|
/// Returns the index last updated time.
|
||||||
pub fn updated_at(&self, rtxn: &RoTxn<'_>) -> Result<OffsetDateTime> {
|
pub fn updated_at(&self, rtxn: &RoTxn<'_>) -> Result<time::OffsetDateTime> {
|
||||||
Ok(self
|
Ok(self
|
||||||
.main
|
.main
|
||||||
.remap_types::<Str, SerdeJson<OffsetDateTime>>()
|
.remap_types::<Str, SerdeJson<OffsetDateTime>>()
|
||||||
@ -1343,18 +1343,19 @@ impl Index {
|
|||||||
.ok_or(InternalError::DatabaseMissingEntry {
|
.ok_or(InternalError::DatabaseMissingEntry {
|
||||||
db_name: db_name::MAIN,
|
db_name: db_name::MAIN,
|
||||||
key: Some(main_key::UPDATED_AT_KEY),
|
key: Some(main_key::UPDATED_AT_KEY),
|
||||||
})?)
|
})?
|
||||||
|
.0)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn set_updated_at(
|
pub(crate) fn set_updated_at(
|
||||||
&self,
|
&self,
|
||||||
wtxn: &mut RwTxn<'_>,
|
wtxn: &mut RwTxn<'_>,
|
||||||
time: &OffsetDateTime,
|
time: &time::OffsetDateTime,
|
||||||
) -> heed::Result<()> {
|
) -> heed::Result<()> {
|
||||||
self.main.remap_types::<Str, SerdeJson<OffsetDateTime>>().put(
|
self.main.remap_types::<Str, SerdeJson<OffsetDateTime>>().put(
|
||||||
wtxn,
|
wtxn,
|
||||||
main_key::UPDATED_AT_KEY,
|
main_key::UPDATED_AT_KEY,
|
||||||
time,
|
&OffsetDateTime(*time),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1681,6 +1682,10 @@ pub struct IndexEmbeddingConfig {
|
|||||||
pub user_provided: RoaringBitmap,
|
pub user_provided: RoaringBitmap,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize)]
|
||||||
|
#[serde(transparent)]
|
||||||
|
struct OffsetDateTime(#[serde(with = "time::serde::rfc3339")] time::OffsetDateTime);
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
pub(crate) mod tests {
|
pub(crate) mod tests {
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
|
@ -90,6 +90,21 @@ impl LocalizedFieldIds {
|
|||||||
pub fn locales(&self, fields_id: FieldId) -> Option<&[Language]> {
|
pub fn locales(&self, fields_id: FieldId) -> Option<&[Language]> {
|
||||||
self.field_id_to_locales.get(&fields_id).map(Vec::as_slice)
|
self.field_id_to_locales.get(&fields_id).map(Vec::as_slice)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn all_locales(&self) -> Vec<Language> {
|
||||||
|
let mut locales = Vec::new();
|
||||||
|
for field_locales in self.field_id_to_locales.values() {
|
||||||
|
if !field_locales.is_empty() {
|
||||||
|
locales.extend(field_locales);
|
||||||
|
} else {
|
||||||
|
// If a field has no locales, we consider it as not localized
|
||||||
|
return Vec::new();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
locales.sort();
|
||||||
|
locales.dedup();
|
||||||
|
locales
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
@ -339,10 +339,18 @@ impl ValuesCollection {
|
|||||||
fn normalize_facet_string(facet_string: &str, locales: Option<&[Language]>) -> String {
|
fn normalize_facet_string(facet_string: &str, locales: Option<&[Language]>) -> String {
|
||||||
let options = NormalizerOption { lossy: true, ..Default::default() };
|
let options = NormalizerOption { lossy: true, ..Default::default() };
|
||||||
let mut detection = StrDetection::new(facet_string, locales);
|
let mut detection = StrDetection::new(facet_string, locales);
|
||||||
|
|
||||||
|
// Detect the language of the facet string only if several locales are explicitly provided.
|
||||||
|
let language = match locales {
|
||||||
|
Some(&[language]) => Some(language),
|
||||||
|
Some(multiple_locales) if multiple_locales.len() > 1 => detection.language(),
|
||||||
|
_ => None,
|
||||||
|
};
|
||||||
|
|
||||||
let token = Token {
|
let token = Token {
|
||||||
lemma: std::borrow::Cow::Borrowed(facet_string),
|
lemma: std::borrow::Cow::Borrowed(facet_string),
|
||||||
script: detection.script(),
|
script: detection.script(),
|
||||||
language: detection.language(),
|
language,
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -360,6 +360,7 @@ mod test {
|
|||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
#[cfg(feature = "japanese")]
|
#[cfg(feature = "japanese")]
|
||||||
|
#[cfg(not(feature = "chinese-pinyin"))]
|
||||||
#[test]
|
#[test]
|
||||||
fn test_kanji_language_detection() {
|
fn test_kanji_language_detection() {
|
||||||
use crate::index::tests::TempIndex;
|
use crate::index::tests::TempIndex;
|
||||||
|
@ -110,18 +110,18 @@ impl<'ctx> DatabaseCache<'ctx> {
|
|||||||
.map_err(Into::into)
|
.map_err(Into::into)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_value_from_keys<'v, K1, KC, DC>(
|
fn get_value_from_keys<'v, K1, KC>(
|
||||||
txn: &'ctx RoTxn<'_>,
|
txn: &'ctx RoTxn<'_>,
|
||||||
cache_key: K1,
|
cache_key: K1,
|
||||||
db_keys: &'v [KC::EItem],
|
db_keys: &'v [KC::EItem],
|
||||||
cache: &mut FxHashMap<K1, Option<Cow<'ctx, [u8]>>>,
|
cache: &mut FxHashMap<K1, Option<Cow<'ctx, [u8]>>>,
|
||||||
db: Database<KC, Bytes>,
|
db: Database<KC, Bytes>,
|
||||||
|
universe: Option<&RoaringBitmap>,
|
||||||
merger: MergeFn,
|
merger: MergeFn,
|
||||||
) -> Result<Option<DC::DItem>>
|
) -> Result<Option<RoaringBitmap>>
|
||||||
where
|
where
|
||||||
K1: Copy + Eq + Hash,
|
K1: Copy + Eq + Hash,
|
||||||
KC: BytesEncode<'v>,
|
KC: BytesEncode<'v>,
|
||||||
DC: BytesDecodeOwned,
|
|
||||||
KC::EItem: Sized,
|
KC::EItem: Sized,
|
||||||
{
|
{
|
||||||
if let Entry::Vacant(entry) = cache.entry(cache_key) {
|
if let Entry::Vacant(entry) = cache.entry(cache_key) {
|
||||||
@ -146,16 +146,22 @@ impl<'ctx> DatabaseCache<'ctx> {
|
|||||||
entry.insert(bitmap_ptr);
|
entry.insert(bitmap_ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
match cache.get(&cache_key).unwrap() {
|
let bitmap_bytes = match cache.get(&cache_key).unwrap() {
|
||||||
Some(Cow::Borrowed(bytes)) => DC::bytes_decode_owned(bytes)
|
Some(Cow::Borrowed(bytes)) => bytes,
|
||||||
|
Some(Cow::Owned(bytes)) => bytes.as_slice(),
|
||||||
|
None => return Ok(None),
|
||||||
|
};
|
||||||
|
|
||||||
|
match (bitmap_bytes, universe) {
|
||||||
|
(bytes, Some(universe)) => {
|
||||||
|
CboRoaringBitmapCodec::intersection_with_serialized(bytes, universe)
|
||||||
|
.map(Some)
|
||||||
|
.map_err(Into::into)
|
||||||
|
}
|
||||||
|
(bytes, None) => CboRoaringBitmapCodec::bytes_decode_owned(bytes)
|
||||||
.map(Some)
|
.map(Some)
|
||||||
.map_err(heed::Error::Decoding)
|
.map_err(heed::Error::Decoding)
|
||||||
.map_err(Into::into),
|
.map_err(Into::into),
|
||||||
Some(Cow::Owned(bytes)) => DC::bytes_decode_owned(bytes)
|
|
||||||
.map(Some)
|
|
||||||
.map_err(heed::Error::Decoding)
|
|
||||||
.map_err(Into::into),
|
|
||||||
None => Ok(None),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -207,12 +213,13 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
let keys: Vec<_> =
|
let keys: Vec<_> =
|
||||||
restricted_fids.tolerant.iter().map(|(fid, _)| (interned, *fid)).collect();
|
restricted_fids.tolerant.iter().map(|(fid, _)| (interned, *fid)).collect();
|
||||||
|
|
||||||
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
|
DatabaseCache::get_value_from_keys::<_, _>(
|
||||||
self.txn,
|
self.txn,
|
||||||
word,
|
word,
|
||||||
&keys[..],
|
&keys[..],
|
||||||
&mut self.db_cache.word_docids,
|
&mut self.db_cache.word_docids,
|
||||||
self.index.word_fid_docids.remap_data_type::<Bytes>(),
|
self.index.word_fid_docids.remap_data_type::<Bytes>(),
|
||||||
|
universe,
|
||||||
merge_cbo_roaring_bitmaps,
|
merge_cbo_roaring_bitmaps,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@ -238,12 +245,13 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
let keys: Vec<_> =
|
let keys: Vec<_> =
|
||||||
restricted_fids.exact.iter().map(|(fid, _)| (interned, *fid)).collect();
|
restricted_fids.exact.iter().map(|(fid, _)| (interned, *fid)).collect();
|
||||||
|
|
||||||
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
|
DatabaseCache::get_value_from_keys::<_, _>(
|
||||||
self.txn,
|
self.txn,
|
||||||
word,
|
word,
|
||||||
&keys[..],
|
&keys[..],
|
||||||
&mut self.db_cache.exact_word_docids,
|
&mut self.db_cache.exact_word_docids,
|
||||||
self.index.word_fid_docids.remap_data_type::<Bytes>(),
|
self.index.word_fid_docids.remap_data_type::<Bytes>(),
|
||||||
|
universe,
|
||||||
merge_cbo_roaring_bitmaps,
|
merge_cbo_roaring_bitmaps,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@ -294,12 +302,13 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
let keys: Vec<_> =
|
let keys: Vec<_> =
|
||||||
restricted_fids.tolerant.iter().map(|(fid, _)| (interned, *fid)).collect();
|
restricted_fids.tolerant.iter().map(|(fid, _)| (interned, *fid)).collect();
|
||||||
|
|
||||||
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
|
DatabaseCache::get_value_from_keys::<_, _>(
|
||||||
self.txn,
|
self.txn,
|
||||||
prefix,
|
prefix,
|
||||||
&keys[..],
|
&keys[..],
|
||||||
&mut self.db_cache.word_prefix_docids,
|
&mut self.db_cache.word_prefix_docids,
|
||||||
self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
|
self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
|
||||||
|
universe,
|
||||||
merge_cbo_roaring_bitmaps,
|
merge_cbo_roaring_bitmaps,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@ -325,12 +334,13 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
let keys: Vec<_> =
|
let keys: Vec<_> =
|
||||||
restricted_fids.exact.iter().map(|(fid, _)| (interned, *fid)).collect();
|
restricted_fids.exact.iter().map(|(fid, _)| (interned, *fid)).collect();
|
||||||
|
|
||||||
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
|
DatabaseCache::get_value_from_keys::<_, _>(
|
||||||
self.txn,
|
self.txn,
|
||||||
prefix,
|
prefix,
|
||||||
&keys[..],
|
&keys[..],
|
||||||
&mut self.db_cache.exact_word_prefix_docids,
|
&mut self.db_cache.exact_word_prefix_docids,
|
||||||
self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
|
self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
|
||||||
|
universe,
|
||||||
merge_cbo_roaring_bitmaps,
|
merge_cbo_roaring_bitmaps,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
@ -49,6 +49,7 @@ pub use self::geo_sort::Strategy as GeoSortStrategy;
|
|||||||
use self::graph_based_ranking_rule::Words;
|
use self::graph_based_ranking_rule::Words;
|
||||||
use self::interner::Interned;
|
use self::interner::Interned;
|
||||||
use self::vector_sort::VectorSort;
|
use self::vector_sort::VectorSort;
|
||||||
|
use crate::localized_attributes_rules::LocalizedFieldIds;
|
||||||
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
||||||
use crate::search::new::distinct::apply_distinct_rule;
|
use crate::search::new::distinct::apply_distinct_rule;
|
||||||
use crate::vector::Embedder;
|
use crate::vector::Embedder;
|
||||||
@ -671,9 +672,44 @@ pub fn execute_search(
|
|||||||
tokbuilder.words_dict(dictionary);
|
tokbuilder.words_dict(dictionary);
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(locales) = locales {
|
let db_locales;
|
||||||
|
match locales {
|
||||||
|
Some(locales) => {
|
||||||
|
if !locales.is_empty() {
|
||||||
tokbuilder.allow_list(locales);
|
tokbuilder.allow_list(locales);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
// If no locales are specified, we use the locales specified in the localized attributes rules
|
||||||
|
let localized_attributes_rules = ctx.index.localized_attributes_rules(ctx.txn)?;
|
||||||
|
let fields_ids_map = ctx.index.fields_ids_map(ctx.txn)?;
|
||||||
|
let searchable_fields = ctx.index.searchable_fields_ids(ctx.txn)?;
|
||||||
|
|
||||||
|
let localized_fields = match &ctx.restricted_fids {
|
||||||
|
// if AttributeToSearchOn is set, use the restricted list of ids
|
||||||
|
Some(restricted_fids) => {
|
||||||
|
let iter = restricted_fids
|
||||||
|
.exact
|
||||||
|
.iter()
|
||||||
|
.chain(restricted_fids.tolerant.iter())
|
||||||
|
.map(|(fid, _)| *fid);
|
||||||
|
|
||||||
|
LocalizedFieldIds::new(&localized_attributes_rules, &fields_ids_map, iter)
|
||||||
|
}
|
||||||
|
// Otherwise use the full list of ids coming from the index searchable fields
|
||||||
|
None => LocalizedFieldIds::new(
|
||||||
|
&localized_attributes_rules,
|
||||||
|
&fields_ids_map,
|
||||||
|
searchable_fields.into_iter(),
|
||||||
|
),
|
||||||
|
};
|
||||||
|
|
||||||
|
db_locales = localized_fields.all_locales();
|
||||||
|
if !db_locales.is_empty() {
|
||||||
|
tokbuilder.allow_list(&db_locales);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
let tokenizer = tokbuilder.build();
|
let tokenizer = tokbuilder.build();
|
||||||
drop(entered);
|
drop(entered);
|
||||||
|
@ -6,6 +6,7 @@ pub mod exactness;
|
|||||||
pub mod geo_sort;
|
pub mod geo_sort;
|
||||||
pub mod integration;
|
pub mod integration;
|
||||||
#[cfg(feature = "all-tokenizations")]
|
#[cfg(feature = "all-tokenizations")]
|
||||||
|
#[cfg(not(feature = "chinese-pinyin"))]
|
||||||
pub mod language;
|
pub mod language;
|
||||||
pub mod ngram_split_words;
|
pub mod ngram_split_words;
|
||||||
pub mod proximity;
|
pub mod proximity;
|
||||||
|
@ -12,6 +12,7 @@ use heed::BytesEncode;
|
|||||||
use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters};
|
use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters};
|
||||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec};
|
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec};
|
||||||
use crate::heed_codec::{BEU16StrCodec, StrRefCodec};
|
use crate::heed_codec::{BEU16StrCodec, StrRefCodec};
|
||||||
|
use crate::localized_attributes_rules::LocalizedFieldIds;
|
||||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
|
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
|
||||||
use crate::update::index_documents::helpers::{
|
use crate::update::index_documents::helpers::{
|
||||||
merge_deladd_btreeset_string, merge_deladd_cbo_roaring_bitmaps,
|
merge_deladd_btreeset_string, merge_deladd_cbo_roaring_bitmaps,
|
||||||
@ -28,6 +29,116 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
|
|||||||
docid_fid_facet_string: grenad::Reader<R>,
|
docid_fid_facet_string: grenad::Reader<R>,
|
||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
settings_diff: &InnerIndexSettingsDiff,
|
settings_diff: &InnerIndexSettingsDiff,
|
||||||
|
) -> Result<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
|
||||||
|
if settings_diff.settings_update_only() {
|
||||||
|
extract_facet_string_docids_settings(docid_fid_facet_string, indexer, settings_diff)
|
||||||
|
} else {
|
||||||
|
let localized_field_ids = &settings_diff.new.localized_faceted_fields_ids;
|
||||||
|
extract_facet_string_docids_document_update(
|
||||||
|
docid_fid_facet_string,
|
||||||
|
indexer,
|
||||||
|
localized_field_ids,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Extracts the facet string and the documents ids where this facet string appear.
|
||||||
|
///
|
||||||
|
/// Returns a grenad reader with the list of extracted facet strings and
|
||||||
|
/// documents ids from the given chunk of docid facet string positions.
|
||||||
|
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
|
||||||
|
fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(
|
||||||
|
docid_fid_facet_string: grenad::Reader<R>,
|
||||||
|
indexer: GrenadParameters,
|
||||||
|
localized_field_ids: &LocalizedFieldIds,
|
||||||
|
) -> Result<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
|
||||||
|
let max_memory = indexer.max_memory_by_thread();
|
||||||
|
|
||||||
|
let mut facet_string_docids_sorter = create_sorter(
|
||||||
|
grenad::SortAlgorithm::Stable,
|
||||||
|
merge_deladd_cbo_roaring_bitmaps,
|
||||||
|
indexer.chunk_compression_type,
|
||||||
|
indexer.chunk_compression_level,
|
||||||
|
indexer.max_nb_chunks,
|
||||||
|
max_memory.map(|m| m / 2),
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut normalized_facet_string_docids_sorter = create_sorter(
|
||||||
|
grenad::SortAlgorithm::Stable,
|
||||||
|
merge_deladd_btreeset_string,
|
||||||
|
indexer.chunk_compression_type,
|
||||||
|
indexer.chunk_compression_level,
|
||||||
|
indexer.max_nb_chunks,
|
||||||
|
max_memory.map(|m| m / 2),
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
let mut cursor = docid_fid_facet_string.into_cursor()?;
|
||||||
|
while let Some((key, deladd_original_value_bytes)) = cursor.move_on_next()? {
|
||||||
|
let deladd_reader = KvReaderDelAdd::new(deladd_original_value_bytes);
|
||||||
|
|
||||||
|
let is_same_value = deladd_reader.get(DelAdd::Deletion).is_some()
|
||||||
|
&& deladd_reader.get(DelAdd::Addition).is_some();
|
||||||
|
|
||||||
|
if is_same_value {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let (field_id_bytes, bytes) = try_split_array_at(key).unwrap();
|
||||||
|
let field_id = FieldId::from_be_bytes(field_id_bytes);
|
||||||
|
|
||||||
|
let (document_id_bytes, normalized_value_bytes) =
|
||||||
|
try_split_array_at::<_, 4>(bytes).unwrap();
|
||||||
|
let document_id = u32::from_be_bytes(document_id_bytes);
|
||||||
|
|
||||||
|
let normalized_value = str::from_utf8(normalized_value_bytes)?;
|
||||||
|
|
||||||
|
// Facet search normalization
|
||||||
|
{
|
||||||
|
let locales = localized_field_ids.locales(field_id);
|
||||||
|
let hyper_normalized_value = normalize_facet_string(normalized_value, locales);
|
||||||
|
|
||||||
|
let set = BTreeSet::from_iter(std::iter::once(normalized_value));
|
||||||
|
|
||||||
|
// as the facet string is the same, we can put the deletion and addition in the same obkv.
|
||||||
|
buffer.clear();
|
||||||
|
let mut obkv = KvWriterDelAdd::new(&mut buffer);
|
||||||
|
for (deladd_key, _) in deladd_reader.iter() {
|
||||||
|
let val = SerdeJson::bytes_encode(&set).map_err(heed::Error::Encoding)?;
|
||||||
|
obkv.insert(deladd_key, val)?;
|
||||||
|
}
|
||||||
|
obkv.finish()?;
|
||||||
|
|
||||||
|
let key: (u16, &str) = (field_id, hyper_normalized_value.as_ref());
|
||||||
|
let key_bytes = BEU16StrCodec::bytes_encode(&key).map_err(heed::Error::Encoding)?;
|
||||||
|
normalized_facet_string_docids_sorter.insert(key_bytes, &buffer)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
let key = FacetGroupKey { field_id, level: 0, left_bound: normalized_value };
|
||||||
|
let key_bytes = FacetGroupKeyCodec::<StrRefCodec>::bytes_encode(&key).unwrap();
|
||||||
|
|
||||||
|
buffer.clear();
|
||||||
|
let mut obkv = KvWriterDelAdd::new(&mut buffer);
|
||||||
|
for (deladd_key, _) in deladd_reader.iter() {
|
||||||
|
obkv.insert(deladd_key, document_id.to_ne_bytes())?;
|
||||||
|
}
|
||||||
|
obkv.finish()?;
|
||||||
|
facet_string_docids_sorter.insert(&key_bytes, &buffer)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
let normalized = sorter_into_reader(normalized_facet_string_docids_sorter, indexer)?;
|
||||||
|
sorter_into_reader(facet_string_docids_sorter, indexer).map(|s| (s, normalized))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Extracts the facet string and the documents ids where this facet string appear.
|
||||||
|
///
|
||||||
|
/// Returns a grenad reader with the list of extracted facet strings and
|
||||||
|
/// documents ids from the given chunk of docid facet string positions.
|
||||||
|
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
|
||||||
|
fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
|
||||||
|
docid_fid_facet_string: grenad::Reader<R>,
|
||||||
|
indexer: GrenadParameters,
|
||||||
|
settings_diff: &InnerIndexSettingsDiff,
|
||||||
) -> Result<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
|
) -> Result<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
|
||||||
let max_memory = indexer.max_memory_by_thread();
|
let max_memory = indexer.max_memory_by_thread();
|
||||||
|
|
||||||
@ -60,6 +171,15 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
|
|||||||
let (field_id_bytes, bytes) = try_split_array_at(key).unwrap();
|
let (field_id_bytes, bytes) = try_split_array_at(key).unwrap();
|
||||||
let field_id = FieldId::from_be_bytes(field_id_bytes);
|
let field_id = FieldId::from_be_bytes(field_id_bytes);
|
||||||
|
|
||||||
|
let old_locales = settings_diff.old.localized_faceted_fields_ids.locales(field_id);
|
||||||
|
let new_locales = settings_diff.new.localized_faceted_fields_ids.locales(field_id);
|
||||||
|
|
||||||
|
let are_same_locales = old_locales == new_locales;
|
||||||
|
|
||||||
|
if is_same_value && are_same_locales {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
let (document_id_bytes, normalized_value_bytes) =
|
let (document_id_bytes, normalized_value_bytes) =
|
||||||
try_split_array_at::<_, 4>(bytes).unwrap();
|
try_split_array_at::<_, 4>(bytes).unwrap();
|
||||||
let document_id = u32::from_be_bytes(document_id_bytes);
|
let document_id = u32::from_be_bytes(document_id_bytes);
|
||||||
@ -68,15 +188,17 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
|
|||||||
|
|
||||||
// Facet search normalization
|
// Facet search normalization
|
||||||
{
|
{
|
||||||
let locales = settings_diff.old.localized_faceted_fields_ids.locales(field_id);
|
let old_hyper_normalized_value = normalize_facet_string(normalized_value, old_locales);
|
||||||
let old_hyper_normalized_value = normalize_facet_string(normalized_value, locales);
|
let new_hyper_normalized_value = if are_same_locales {
|
||||||
let locales = settings_diff.new.localized_faceted_fields_ids.locales(field_id);
|
&old_hyper_normalized_value
|
||||||
let new_hyper_normalized_value = normalize_facet_string(normalized_value, locales);
|
} else {
|
||||||
|
&normalize_facet_string(normalized_value, new_locales)
|
||||||
|
};
|
||||||
|
|
||||||
let set = BTreeSet::from_iter(std::iter::once(normalized_value));
|
let set = BTreeSet::from_iter(std::iter::once(normalized_value));
|
||||||
|
|
||||||
// if the facet string is the same, we can put the deletion and addition in the same obkv.
|
// if the facet string is the same, we can put the deletion and addition in the same obkv.
|
||||||
if old_hyper_normalized_value == new_hyper_normalized_value {
|
if old_hyper_normalized_value == new_hyper_normalized_value.as_str() {
|
||||||
// nothing to do if we delete and re-add the value.
|
// nothing to do if we delete and re-add the value.
|
||||||
if is_same_value {
|
if is_same_value {
|
||||||
continue;
|
continue;
|
||||||
@ -148,12 +270,21 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
|
|||||||
|
|
||||||
/// Normalizes the facet string and truncates it to the max length.
|
/// Normalizes the facet string and truncates it to the max length.
|
||||||
fn normalize_facet_string(facet_string: &str, locales: Option<&[Language]>) -> String {
|
fn normalize_facet_string(facet_string: &str, locales: Option<&[Language]>) -> String {
|
||||||
let options = NormalizerOption { lossy: true, ..Default::default() };
|
let options: NormalizerOption = NormalizerOption { lossy: true, ..Default::default() };
|
||||||
let mut detection = StrDetection::new(facet_string, locales);
|
let mut detection = StrDetection::new(facet_string, locales);
|
||||||
|
|
||||||
|
let script = detection.script();
|
||||||
|
// Detect the language of the facet string only if several locales are explicitly provided.
|
||||||
|
let language = match locales {
|
||||||
|
Some(&[language]) => Some(language),
|
||||||
|
Some(multiple_locales) if multiple_locales.len() > 1 => detection.language(),
|
||||||
|
_ => None,
|
||||||
|
};
|
||||||
|
|
||||||
let token = Token {
|
let token = Token {
|
||||||
lemma: std::borrow::Cow::Borrowed(facet_string),
|
lemma: std::borrow::Cow::Borrowed(facet_string),
|
||||||
script: detection.script(),
|
script,
|
||||||
language: detection.language(),
|
language,
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -9,7 +9,7 @@ use std::result::Result as StdResult;
|
|||||||
use bytemuck::bytes_of;
|
use bytemuck::bytes_of;
|
||||||
use grenad::Sorter;
|
use grenad::Sorter;
|
||||||
use heed::BytesEncode;
|
use heed::BytesEncode;
|
||||||
use itertools::{merge_join_by, EitherOrBoth};
|
use itertools::{merge_join_by, EitherOrBoth, Itertools};
|
||||||
use ordered_float::OrderedFloat;
|
use ordered_float::OrderedFloat;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use serde_json::{from_slice, Value};
|
use serde_json::{from_slice, Value};
|
||||||
@ -317,11 +317,15 @@ fn deladd_obkv_cbo_roaring_bitmaps(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Truncates a string to the biggest valid LMDB key size.
|
/// Truncates a string to the biggest valid LMDB key size.
|
||||||
fn truncate_string(s: String) -> String {
|
fn truncate_str(s: &str) -> &str {
|
||||||
s.char_indices()
|
let index = s
|
||||||
.take_while(|(idx, _)| idx + 4 < MAX_FACET_VALUE_LENGTH)
|
.char_indices()
|
||||||
.map(|(_, c)| c)
|
.map(|(idx, _)| idx)
|
||||||
.collect()
|
.chain(std::iter::once(s.len()))
|
||||||
|
.take_while(|idx| idx <= &MAX_FACET_VALUE_LENGTH)
|
||||||
|
.last();
|
||||||
|
|
||||||
|
&s[..index.unwrap_or(0)]
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Computes the diff between both Del and Add numbers and
|
/// Computes the diff between both Del and Add numbers and
|
||||||
@ -401,37 +405,103 @@ where
|
|||||||
del_strings.dedup();
|
del_strings.dedup();
|
||||||
add_strings.dedup();
|
add_strings.dedup();
|
||||||
|
|
||||||
|
let del_strings = del_strings.iter().chunk_by(|(normalized, _)| normalized);
|
||||||
|
let add_strings = add_strings.iter().chunk_by(|(normalized, _)| normalized);
|
||||||
|
|
||||||
let merged_strings_iter = itertools::merge_join_by(
|
let merged_strings_iter = itertools::merge_join_by(
|
||||||
del_strings.into_iter().filter(|(n, _)| !n.is_empty()),
|
del_strings.into_iter().filter(|(n, _)| !n.is_empty()),
|
||||||
add_strings.into_iter().filter(|(n, _)| !n.is_empty()),
|
add_strings.into_iter().filter(|(n, _)| !n.is_empty()),
|
||||||
|del, add| del.cmp(add),
|
|(normalized_del, _), (normalized_add, _)| normalized_del.cmp(normalized_add),
|
||||||
);
|
);
|
||||||
|
|
||||||
// insert normalized and original facet string in sorter
|
// insert normalized and original facet string in sorter
|
||||||
for eob in merged_strings_iter {
|
for eob in merged_strings_iter {
|
||||||
key_buffer.truncate(TRUNCATE_SIZE);
|
key_buffer.truncate(TRUNCATE_SIZE);
|
||||||
|
let (side, normalized, original) = match eob {
|
||||||
|
EitherOrBoth::Both((normalized, del), (_, add)) => {
|
||||||
|
let merged_strings_iter =
|
||||||
|
itertools::merge_join_by(del, add, |(_, original_del), (_, original_add)| {
|
||||||
|
original_del.cmp(original_add)
|
||||||
|
});
|
||||||
|
|
||||||
|
// FIXME: we're in a bit of a pickle here, because we're only saving **one** original value per side,
|
||||||
|
// but we possibly have multiple original values that changed in the case where the field is an
|
||||||
|
// array of multiple values that normalize to the same value.
|
||||||
|
// (e.g. "foo" = ["bar", "Bar", "bAr", "baR"]. I'm not judging why you would do that ¯\_(ツ)_/¯)
|
||||||
|
//
|
||||||
|
// We'll work best effort by ignoring when the same value appears in both sides, deleting the first
|
||||||
|
// value that is only in the old version, and adding the first value that is only in the new version
|
||||||
|
let mut obkv = KvWriterDelAdd::memory();
|
||||||
|
let mut del = None;
|
||||||
|
let mut add = None;
|
||||||
|
let mut both = None;
|
||||||
|
|
||||||
|
for eob in merged_strings_iter {
|
||||||
match eob {
|
match eob {
|
||||||
EitherOrBoth::Both(_, _) => (), // no need to touch anything
|
EitherOrBoth::Both((_normalized, original), _) => {
|
||||||
EitherOrBoth::Left((normalized, original)) => {
|
both = match both {
|
||||||
let truncated = truncate_string(normalized);
|
Some(both) => Some(both),
|
||||||
|
None => Some(original),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
EitherOrBoth::Left((_normalized, original)) => {
|
||||||
|
del = match del {
|
||||||
|
Some(del) => Some(del),
|
||||||
|
None => Some(original),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
EitherOrBoth::Right((_normalized, original)) => {
|
||||||
|
add = match add {
|
||||||
|
Some(add) => Some(add),
|
||||||
|
None => Some(original),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(del) = del {
|
||||||
|
obkv.insert(DelAdd::Deletion, del)?;
|
||||||
|
}
|
||||||
|
if let Some(add) = add
|
||||||
|
// prefer the newly added, but if there is none, keep a value in the list of values
|
||||||
|
// since the normalized value appears both in old and new, we should never remove it.
|
||||||
|
.or(both)
|
||||||
|
{
|
||||||
|
obkv.insert(DelAdd::Addition, add)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
let truncated = truncate_str(normalized);
|
||||||
|
key_buffer.extend_from_slice(truncated.as_bytes());
|
||||||
|
|
||||||
|
let bytes = obkv.into_inner()?;
|
||||||
|
fid_docid_facet_strings_sorter.insert(&key_buffer, bytes)?;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
EitherOrBoth::Left((_normalized, mut original)) => {
|
||||||
|
// FIXME: we only consider the first value for the purpose of facet search
|
||||||
|
// another structure is needed, able to retain all originals associated with a normalized value.
|
||||||
|
let Some((normalized, original)) = original.next() else {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
(DelAdd::Deletion, normalized, original)
|
||||||
|
}
|
||||||
|
EitherOrBoth::Right((_normalized, mut original)) => {
|
||||||
|
// FIXME: we only consider the first value for the purpose of facet search
|
||||||
|
// another structure is needed, able to retain all originals associated with a normalized value.
|
||||||
|
let Some((normalized, original)) = original.next() else {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
(DelAdd::Addition, normalized, original)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let truncated = truncate_str(normalized);
|
||||||
key_buffer.extend_from_slice(truncated.as_bytes());
|
key_buffer.extend_from_slice(truncated.as_bytes());
|
||||||
|
|
||||||
let mut obkv = KvWriterDelAdd::memory();
|
let mut obkv = KvWriterDelAdd::memory();
|
||||||
obkv.insert(DelAdd::Deletion, original)?;
|
obkv.insert(side, original)?;
|
||||||
let bytes = obkv.into_inner()?;
|
let bytes = obkv.into_inner()?;
|
||||||
fid_docid_facet_strings_sorter.insert(&key_buffer, bytes)?;
|
fid_docid_facet_strings_sorter.insert(&key_buffer, bytes)?;
|
||||||
}
|
}
|
||||||
EitherOrBoth::Right((normalized, original)) => {
|
|
||||||
let truncated = truncate_string(normalized);
|
|
||||||
key_buffer.extend_from_slice(truncated.as_bytes());
|
|
||||||
|
|
||||||
let mut obkv = KvWriterDelAdd::memory();
|
|
||||||
obkv.insert(DelAdd::Addition, original)?;
|
|
||||||
let bytes = obkv.into_inner()?;
|
|
||||||
fid_docid_facet_strings_sorter.insert(&key_buffer, bytes)?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
@ -290,7 +290,7 @@ where
|
|||||||
|
|
||||||
match result? {
|
match result? {
|
||||||
DocumentEdition::Deleted(docid) => {
|
DocumentEdition::Deleted(docid) => {
|
||||||
documents_to_remove.push(docid);
|
documents_to_remove.insert(docid);
|
||||||
}
|
}
|
||||||
DocumentEdition::Edited(new_document) => {
|
DocumentEdition::Edited(new_document) => {
|
||||||
documents_batch_builder.append_json_object(&new_document)?;
|
documents_batch_builder.append_json_object(&new_document)?;
|
||||||
|
@ -62,8 +62,18 @@ pub enum EmbedErrorKind {
|
|||||||
RestResponseDeserialization(std::io::Error),
|
RestResponseDeserialization(std::io::Error),
|
||||||
#[error("expected a response containing {0} embeddings, got only {1}")]
|
#[error("expected a response containing {0} embeddings, got only {1}")]
|
||||||
RestResponseEmbeddingCount(usize, usize),
|
RestResponseEmbeddingCount(usize, usize),
|
||||||
#[error("could not authenticate against embedding server{}", option_info(.0.as_deref(), "server replied with "))]
|
#[error("could not authenticate against {embedding} server{server_reply}{hint}", embedding=match *.1 {
|
||||||
RestUnauthorized(Option<String>),
|
ConfigurationSource::User => "embedding",
|
||||||
|
ConfigurationSource::OpenAi => "OpenAI",
|
||||||
|
ConfigurationSource::Ollama => "ollama"
|
||||||
|
},
|
||||||
|
server_reply=option_info(.0.as_deref(), "server replied with "),
|
||||||
|
hint=match *.1 {
|
||||||
|
ConfigurationSource::User => "\n - Hint: Check the `apiKey` parameter in the embedder configuration",
|
||||||
|
ConfigurationSource::OpenAi => "\n - Hint: Check the `apiKey` parameter in the embedder configuration, and the `MEILI_OPENAI_API_KEY` and `OPENAI_API_KEY` environment variables",
|
||||||
|
ConfigurationSource::Ollama => "\n - Hint: Check the `apiKey` parameter in the embedder configuration"
|
||||||
|
})]
|
||||||
|
RestUnauthorized(Option<String>, ConfigurationSource),
|
||||||
#[error("sent too many requests to embedding server{}", option_info(.0.as_deref(), "server replied with "))]
|
#[error("sent too many requests to embedding server{}", option_info(.0.as_deref(), "server replied with "))]
|
||||||
RestTooManyRequests(Option<String>),
|
RestTooManyRequests(Option<String>),
|
||||||
#[error("sent a bad request to embedding server{}{}",
|
#[error("sent a bad request to embedding server{}{}",
|
||||||
@ -136,8 +146,14 @@ impl EmbedError {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn rest_unauthorized(error_response: Option<String>) -> EmbedError {
|
pub(crate) fn rest_unauthorized(
|
||||||
Self { kind: EmbedErrorKind::RestUnauthorized(error_response), fault: FaultSource::User }
|
error_response: Option<String>,
|
||||||
|
configuration_source: ConfigurationSource,
|
||||||
|
) -> EmbedError {
|
||||||
|
Self {
|
||||||
|
kind: EmbedErrorKind::RestUnauthorized(error_response, configuration_source),
|
||||||
|
fault: FaultSource::User,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn rest_too_many_requests(error_response: Option<String>) -> EmbedError {
|
pub(crate) fn rest_too_many_requests(error_response: Option<String>) -> EmbedError {
|
||||||
|
@ -183,7 +183,7 @@ impl Embedder {
|
|||||||
|
|
||||||
let rest_embedder = RestEmbedder::new(
|
let rest_embedder = RestEmbedder::new(
|
||||||
RestEmbedderOptions {
|
RestEmbedderOptions {
|
||||||
api_key: Some(api_key.clone()),
|
api_key: (!api_key.is_empty()).then(|| api_key.clone()),
|
||||||
distribution: None,
|
distribution: None,
|
||||||
dimensions: Some(options.dimensions()),
|
dimensions: Some(options.dimensions()),
|
||||||
url,
|
url,
|
||||||
|
@ -275,7 +275,10 @@ fn check_response(
|
|||||||
Err(ureq::Error::Status(code, response)) => {
|
Err(ureq::Error::Status(code, response)) => {
|
||||||
let error_response: Option<String> = response.into_string().ok();
|
let error_response: Option<String> = response.into_string().ok();
|
||||||
Err(match code {
|
Err(match code {
|
||||||
401 => Retry::give_up(EmbedError::rest_unauthorized(error_response)),
|
401 => Retry::give_up(EmbedError::rest_unauthorized(
|
||||||
|
error_response,
|
||||||
|
configuration_source,
|
||||||
|
)),
|
||||||
429 => Retry::rate_limited(EmbedError::rest_too_many_requests(error_response)),
|
429 => Retry::rate_limited(EmbedError::rest_too_many_requests(error_response)),
|
||||||
400 => Retry::give_up(EmbedError::rest_bad_request(
|
400 => Retry::give_up(EmbedError::rest_bad_request(
|
||||||
error_response,
|
error_response,
|
||||||
|
Loading…
Reference in New Issue
Block a user