mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-11 14:04:31 +01:00
Merge #4715
4715: Build all arroy indexes that need to be built r=dureuill a=irevoire # Pull Request ## Related issue Fixes https://github.com/meilisearch/meilisearch/issues/4588 ## What does this PR do? - Update arroy - Ensure we always rebuild the arroy indexes that need to be built Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
commit
298c7b0c93
24
Cargo.lock
generated
24
Cargo.lock
generated
@ -381,9 +381,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "arroy"
|
name = "arroy"
|
||||||
version = "0.3.1"
|
version = "0.4.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "73897699bf04bac935c0b120990d2a511e91e563e0f9769f9c8bb983d98dfbc9"
|
checksum = "2ece9e5347e7fdaaea3181dec7f916677ad5f3fcbac183648ce1924eb4aeef9a"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bytemuck",
|
"bytemuck",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
@ -679,9 +679,9 @@ checksum = "2c676a478f63e9fa2dd5368a42f28bba0d6c560b775f38583c8bbaa7fcd67c9c"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "bytemuck"
|
name = "bytemuck"
|
||||||
version = "1.15.0"
|
version = "1.16.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "5d6d68c57235a3a081186990eca2867354726650f42f7516ca50c28d6281fd15"
|
checksum = "b236fc92302c97ed75b38da1f4917b5cdda4984745740f153a5d3059e48d725e"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bytemuck_derive",
|
"bytemuck_derive",
|
||||||
]
|
]
|
||||||
@ -2273,9 +2273,9 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "heed"
|
name = "heed"
|
||||||
version = "0.20.1"
|
version = "0.20.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "6f7acb9683d7c7068aa46d47557bfa4e35a277964b350d9504a87b03610163fd"
|
checksum = "f60d7cff16094be9627830b399c087a25017e93fb3768b87cd656a68ccb1ebe8"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bitflags 2.5.0",
|
"bitflags 2.5.0",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
@ -3172,9 +3172,9 @@ checksum = "f9d642685b028806386b2b6e75685faadd3eb65a85fff7df711ce18446a422da"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lmdb-master-sys"
|
name = "lmdb-master-sys"
|
||||||
version = "0.2.0"
|
version = "0.2.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "dc9048db3a58c0732d7236abc4909058f9d2708cfb6d7d047eb895fddec6419a"
|
checksum = "a5142795c220effa4c8f4813537bd4c88113a07e45e93100ccb2adc5cec6c7f3"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cc",
|
"cc",
|
||||||
"doxygen-rs",
|
"doxygen-rs",
|
||||||
@ -5053,18 +5053,18 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "thiserror"
|
name = "thiserror"
|
||||||
version = "1.0.58"
|
version = "1.0.61"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "03468839009160513471e86a034bb2c5c0e4baae3b43f79ffc55c4a5427b3297"
|
checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"thiserror-impl",
|
"thiserror-impl",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "thiserror-impl"
|
name = "thiserror-impl"
|
||||||
version = "1.0.58"
|
version = "1.0.61"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7"
|
checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
|
@ -40,7 +40,7 @@ ureq = "2.9.7"
|
|||||||
uuid = { version = "1.6.1", features = ["serde", "v4"] }
|
uuid = { version = "1.6.1", features = ["serde", "v4"] }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
arroy = "0.3.1"
|
arroy = "0.4.0"
|
||||||
big_s = "1.0.2"
|
big_s = "1.0.2"
|
||||||
crossbeam = "0.8.4"
|
crossbeam = "0.8.4"
|
||||||
insta = { version = "1.34.0", features = ["json", "redactions"] }
|
insta = { version = "1.34.0", features = ["json", "redactions"] }
|
||||||
|
@ -5396,7 +5396,7 @@ mod tests {
|
|||||||
let reader = arroy::Reader::open(&rtxn, i as u16, index.vector_arroy)
|
let reader = arroy::Reader::open(&rtxn, i as u16, index.vector_arroy)
|
||||||
.map(Some)
|
.map(Some)
|
||||||
.or_else(|e| match e {
|
.or_else(|e| match e {
|
||||||
arroy::Error::MissingMetadata => Ok(None),
|
arroy::Error::MissingMetadata(_) => Ok(None),
|
||||||
e => Err(e),
|
e => Err(e),
|
||||||
})
|
})
|
||||||
.transpose();
|
.transpose();
|
||||||
|
@ -65,7 +65,7 @@ impl Display for Value {
|
|||||||
write!(
|
write!(
|
||||||
f,
|
f,
|
||||||
"{}",
|
"{}",
|
||||||
json_string!(self, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" })
|
json_string!(self, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]", ".processingTimeMs" => "[duration]" })
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -213,11 +213,11 @@ async fn clear_documents() {
|
|||||||
|
|
||||||
// Make sure the arroy DB has been cleared
|
// Make sure the arroy DB has been cleared
|
||||||
let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await;
|
let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await;
|
||||||
snapshot!(json_string!(documents), @r###"
|
snapshot!(documents, @r###"
|
||||||
{
|
{
|
||||||
"hits": [],
|
"hits": [],
|
||||||
"query": "",
|
"query": "",
|
||||||
"processingTimeMs": 0,
|
"processingTimeMs": "[duration]",
|
||||||
"limit": 20,
|
"limit": 20,
|
||||||
"offset": 0,
|
"offset": 0,
|
||||||
"estimatedTotalHits": 0,
|
"estimatedTotalHits": 0,
|
||||||
@ -225,3 +225,85 @@ async fn clear_documents() {
|
|||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn add_remove_one_vector_4588() {
|
||||||
|
// https://github.com/meilisearch/meilisearch/issues/4588
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("doggo");
|
||||||
|
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(value, @r###"
|
||||||
|
{
|
||||||
|
"vectorStore": true,
|
||||||
|
"metrics": false,
|
||||||
|
"logsRoute": false
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
let task = server.wait_task(response.uid()).await;
|
||||||
|
snapshot!(task, name: "settings-processed");
|
||||||
|
|
||||||
|
let documents = json!([
|
||||||
|
{"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0] }},
|
||||||
|
]);
|
||||||
|
let (value, code) = index.add_documents(documents, None).await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
let task = index.wait_task(value.uid()).await;
|
||||||
|
snapshot!(task, name: "document-added");
|
||||||
|
|
||||||
|
let documents = json!([
|
||||||
|
{"id": 0, "name": "kefir", "_vectors": { "manual": null }},
|
||||||
|
]);
|
||||||
|
let (value, code) = index.add_documents(documents, None).await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
let task = index.wait_task(value.uid()).await;
|
||||||
|
snapshot!(task, name: "document-deleted");
|
||||||
|
|
||||||
|
let (documents, _code) = index.search_post(json!({"vector": [1, 1, 1] })).await;
|
||||||
|
snapshot!(documents, @r###"
|
||||||
|
{
|
||||||
|
"hits": [
|
||||||
|
{
|
||||||
|
"id": 0,
|
||||||
|
"name": "kefir"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"query": "",
|
||||||
|
"processingTimeMs": "[duration]",
|
||||||
|
"limit": 20,
|
||||||
|
"offset": 0,
|
||||||
|
"estimatedTotalHits": 1,
|
||||||
|
"semanticHitCount": 1
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
let (documents, _code) = index
|
||||||
|
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
|
||||||
|
.await;
|
||||||
|
snapshot!(json_string!(documents), @r###"
|
||||||
|
{
|
||||||
|
"results": [
|
||||||
|
{
|
||||||
|
"id": 0,
|
||||||
|
"name": "kefir",
|
||||||
|
"_vectors": {}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"offset": 0,
|
||||||
|
"limit": 20,
|
||||||
|
"total": 1
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
@ -0,0 +1,19 @@
|
|||||||
|
---
|
||||||
|
source: meilisearch/tests/vector/mod.rs
|
||||||
|
---
|
||||||
|
{
|
||||||
|
"uid": 1,
|
||||||
|
"indexUid": "doggo",
|
||||||
|
"status": "succeeded",
|
||||||
|
"type": "documentAdditionOrUpdate",
|
||||||
|
"canceledBy": null,
|
||||||
|
"details": {
|
||||||
|
"receivedDocuments": 1,
|
||||||
|
"indexedDocuments": 1
|
||||||
|
},
|
||||||
|
"error": null,
|
||||||
|
"duration": "[duration]",
|
||||||
|
"enqueuedAt": "[date]",
|
||||||
|
"startedAt": "[date]",
|
||||||
|
"finishedAt": "[date]"
|
||||||
|
}
|
@ -0,0 +1,19 @@
|
|||||||
|
---
|
||||||
|
source: meilisearch/tests/vector/mod.rs
|
||||||
|
---
|
||||||
|
{
|
||||||
|
"uid": 2,
|
||||||
|
"indexUid": "doggo",
|
||||||
|
"status": "succeeded",
|
||||||
|
"type": "documentAdditionOrUpdate",
|
||||||
|
"canceledBy": null,
|
||||||
|
"details": {
|
||||||
|
"receivedDocuments": 1,
|
||||||
|
"indexedDocuments": 1
|
||||||
|
},
|
||||||
|
"error": null,
|
||||||
|
"duration": "[duration]",
|
||||||
|
"enqueuedAt": "[date]",
|
||||||
|
"startedAt": "[date]",
|
||||||
|
"finishedAt": "[date]"
|
||||||
|
}
|
@ -0,0 +1,23 @@
|
|||||||
|
---
|
||||||
|
source: meilisearch/tests/vector/mod.rs
|
||||||
|
---
|
||||||
|
{
|
||||||
|
"uid": 0,
|
||||||
|
"indexUid": "doggo",
|
||||||
|
"status": "succeeded",
|
||||||
|
"type": "settingsUpdate",
|
||||||
|
"canceledBy": null,
|
||||||
|
"details": {
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"error": null,
|
||||||
|
"duration": "[duration]",
|
||||||
|
"enqueuedAt": "[date]",
|
||||||
|
"startedAt": "[date]",
|
||||||
|
"finishedAt": "[date]"
|
||||||
|
}
|
@ -79,7 +79,7 @@ hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls",
|
|||||||
] }
|
] }
|
||||||
tiktoken-rs = "0.5.8"
|
tiktoken-rs = "0.5.8"
|
||||||
liquid = "0.26.4"
|
liquid = "0.26.4"
|
||||||
arroy = "0.3.1"
|
arroy = "0.4.0"
|
||||||
rand = "0.8.5"
|
rand = "0.8.5"
|
||||||
tracing = "0.1.40"
|
tracing = "0.1.40"
|
||||||
ureq = { version = "2.9.7", features = ["json"] }
|
ureq = { version = "2.9.7", features = ["json"] }
|
||||||
|
@ -281,8 +281,9 @@ impl From<arroy::Error> for Error {
|
|||||||
arroy::Error::DatabaseFull
|
arroy::Error::DatabaseFull
|
||||||
| arroy::Error::InvalidItemAppend
|
| arroy::Error::InvalidItemAppend
|
||||||
| arroy::Error::UnmatchingDistance { .. }
|
| arroy::Error::UnmatchingDistance { .. }
|
||||||
| arroy::Error::MissingNode
|
| arroy::Error::NeedBuild(_)
|
||||||
| arroy::Error::MissingMetadata => {
|
| arroy::Error::MissingKey { .. }
|
||||||
|
| arroy::Error::MissingMetadata(_) => {
|
||||||
Error::InternalError(InternalError::ArroyError(value))
|
Error::InternalError(InternalError::ArroyError(value))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1610,7 +1610,7 @@ impl Index {
|
|||||||
arroy::Reader::open(rtxn, k, self.vector_arroy)
|
arroy::Reader::open(rtxn, k, self.vector_arroy)
|
||||||
.map(Some)
|
.map(Some)
|
||||||
.or_else(|e| match e {
|
.or_else(|e| match e {
|
||||||
arroy::Error::MissingMetadata => Ok(None),
|
arroy::Error::MissingMetadata(_) => Ok(None),
|
||||||
e => Err(e.into()),
|
e => Err(e.into()),
|
||||||
})
|
})
|
||||||
.transpose()
|
.transpose()
|
||||||
@ -1643,7 +1643,7 @@ impl Index {
|
|||||||
let reader = arroy::Reader::open(rtxn, embedder_id | (i as u16), self.vector_arroy)
|
let reader = arroy::Reader::open(rtxn, embedder_id | (i as u16), self.vector_arroy)
|
||||||
.map(Some)
|
.map(Some)
|
||||||
.or_else(|e| match e {
|
.or_else(|e| match e {
|
||||||
arroy::Error::MissingMetadata => Ok(None),
|
arroy::Error::MissingMetadata(_) => Ok(None),
|
||||||
e => Err(e),
|
e => Err(e),
|
||||||
})
|
})
|
||||||
.transpose();
|
.transpose();
|
||||||
|
@ -547,10 +547,11 @@ where
|
|||||||
pool.install(|| {
|
pool.install(|| {
|
||||||
for k in crate::vector::arroy_db_range_for_embedder(embedder_index) {
|
for k in crate::vector::arroy_db_range_for_embedder(embedder_index) {
|
||||||
let writer = arroy::Writer::new(vector_arroy, k, dimension);
|
let writer = arroy::Writer::new(vector_arroy, k, dimension);
|
||||||
if writer.is_empty(wtxn)? {
|
if writer.need_build(wtxn)? {
|
||||||
|
writer.build(wtxn, &mut rng, None)?;
|
||||||
|
} else if writer.is_empty(wtxn)? {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
writer.build(wtxn, &mut rng, None)?;
|
|
||||||
}
|
}
|
||||||
Result::Ok(())
|
Result::Ok(())
|
||||||
})
|
})
|
||||||
|
Loading…
x
Reference in New Issue
Block a user