From 2042229927dffcbc9fddb55168d8955842efcbfd Mon Sep 17 00:00:00 2001 From: curquiza Date: Thu, 19 Oct 2023 08:50:11 +0000 Subject: [PATCH 01/17] Update version for the next release (v1.4.2) in Cargo.toml --- Cargo.lock | 28 ++++++++++++++-------------- Cargo.toml | 2 +- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b3991d130..aa7df19ed 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -468,7 +468,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" [[package]] name = "benchmarks" -version = "1.4.1" +version = "1.4.2" dependencies = [ "anyhow", "bytes", @@ -1206,7 +1206,7 @@ dependencies = [ [[package]] name = "dump" -version = "1.4.1" +version = "1.4.2" dependencies = [ "anyhow", "big_s", @@ -1417,7 +1417,7 @@ dependencies = [ [[package]] name = "file-store" -version = "1.4.1" +version = "1.4.2" dependencies = [ "faux", "tempfile", @@ -1439,7 +1439,7 @@ dependencies = [ [[package]] name = "filter-parser" -version = "1.4.1" +version = "1.4.2" dependencies = [ "insta", "nom", @@ -1459,7 +1459,7 @@ dependencies = [ [[package]] name = "flatten-serde-json" -version = "1.4.1" +version = "1.4.2" dependencies = [ "criterion", "serde_json", @@ -1577,7 +1577,7 @@ dependencies = [ [[package]] name = "fuzzers" -version = "1.4.1" +version = "1.4.2" dependencies = [ "arbitrary", "clap", @@ -1891,7 +1891,7 @@ dependencies = [ [[package]] name = "index-scheduler" -version = "1.4.1" +version = "1.4.2" dependencies = [ "anyhow", "big_s", @@ -2088,7 +2088,7 @@ dependencies = [ [[package]] name = "json-depth-checker" -version = "1.4.1" +version = "1.4.2" dependencies = [ "criterion", "serde_json", @@ -2500,7 +2500,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" [[package]] name = "meili-snap" -version = "1.4.1" +version = "1.4.2" dependencies = [ "insta", "md5", @@ -2509,7 +2509,7 @@ dependencies = [ [[package]] name = "meilisearch" -version = "1.4.1" +version = "1.4.2" dependencies = [ "actix-cors", "actix-http", @@ -2599,7 +2599,7 @@ dependencies = [ [[package]] name = "meilisearch-auth" -version = "1.4.1" +version = "1.4.2" dependencies = [ "base64 0.21.2", "enum-iterator", @@ -2618,7 +2618,7 @@ dependencies = [ [[package]] name = "meilisearch-types" -version = "1.4.1" +version = "1.4.2" dependencies = [ "actix-web", "anyhow", @@ -2672,7 +2672,7 @@ dependencies = [ [[package]] name = "milli" -version = "1.4.1" +version = "1.4.2" dependencies = [ "big_s", "bimap", @@ -2994,7 +2994,7 @@ checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" [[package]] name = "permissive-json-pointer" -version = "1.4.1" +version = "1.4.2" dependencies = [ "big_s", "serde_json", diff --git a/Cargo.toml b/Cargo.toml index 05c7b1012..a40af10f7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,7 +18,7 @@ members = [ ] [workspace.package] -version = "1.4.1" +version = "1.4.2" authors = ["Quentin de Quelen ", "Clément Renault "] description = "Meilisearch HTTP server" homepage = "https://meilisearch.com" From 5fe7c4545a57d7356d25668e7ecbdcf1616082b2 Mon Sep 17 00:00:00 2001 From: Vivek Kumar Date: Thu, 19 Oct 2023 16:48:45 +0530 Subject: [PATCH 02/17] compute all candidates correctly when skipping --- milli/src/search/new/bucket_sort.rs | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/milli/src/search/new/bucket_sort.rs b/milli/src/search/new/bucket_sort.rs index cf2f08cce..df9c14c7d 100644 --- a/milli/src/search/new/bucket_sort.rs +++ b/milli/src/search/new/bucket_sort.rs @@ -46,9 +46,8 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( if let Some(distinct_fid) = distinct_fid { let mut excluded = RoaringBitmap::new(); let mut results = vec![]; - let mut skip = 0; for docid in universe.iter() { - if results.len() >= length { + if results.len() >= from + length { break; } if excluded.contains(docid) { @@ -56,16 +55,16 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( } distinct_single_docid(ctx.index, ctx.txn, distinct_fid, docid, &mut excluded)?; - skip += 1; - if skip <= from { - continue; - } - results.push(docid); } let mut all_candidates = universe - excluded; all_candidates.extend(results.iter().copied()); + if results.len() >= from { + results.drain(..from); + } else { + results.clear(); + } return Ok(BucketSortOutput { scores: vec![Default::default(); results.len()], From 32c78ac8b1ccccf2a41639c159c11ff3eee46a00 Mon Sep 17 00:00:00 2001 From: Vivek Kumar Date: Thu, 19 Oct 2023 16:50:14 +0530 Subject: [PATCH 03/17] add/update tests when search with distinct attribute & pagination with no ranking --- meilisearch/tests/search/distinct.rs | 228 ++++++++++++++++++++++++--- milli/tests/search/distinct.rs | 8 +- 2 files changed, 207 insertions(+), 29 deletions(-) diff --git a/meilisearch/tests/search/distinct.rs b/meilisearch/tests/search/distinct.rs index 93c5197a6..14ce88da2 100644 --- a/meilisearch/tests/search/distinct.rs +++ b/meilisearch/tests/search/distinct.rs @@ -6,21 +6,109 @@ use crate::json; pub(self) static DOCUMENTS: Lazy = Lazy::new(|| { json!([ - {"productId": 1, "shopId": 1}, - {"productId": 2, "shopId": 1}, - {"productId": 3, "shopId": 2}, - {"productId": 4, "shopId": 2}, - {"productId": 5, "shopId": 3}, - {"productId": 6, "shopId": 3}, - {"productId": 7, "shopId": 4}, - {"productId": 8, "shopId": 4}, - {"productId": 9, "shopId": 5}, - {"productId": 10, "shopId": 5} + { + "id": 1, + "description": "Leather Jacket", + "brand": "Lee Jeans", + "product_id": "123456", + "color": "Brown" + }, + { + "id": 2, + "description": "Leather Jacket", + "brand": "Lee Jeans", + "product_id": "123456", + "color": "Black" + }, + { + "id": 3, + "description": "Leather Jacket", + "brand": "Lee Jeans", + "product_id": "123456", + "color": "Blue" + }, + { + "id": 4, + "description": "T-Shirt", + "brand": "Nike", + "product_id": "789012", + "color": "Red" + }, + { + "id": 5, + "description": "T-Shirt", + "brand": "Nike", + "product_id": "789012", + "color": "Blue" + }, + { + "id": 6, + "description": "Running Shoes", + "brand": "Adidas", + "product_id": "456789", + "color": "Black" + }, + { + "id": 7, + "description": "Running Shoes", + "brand": "Adidas", + "product_id": "456789", + "color": "White" + }, + { + "id": 8, + "description": "Hoodie", + "brand": "Puma", + "product_id": "987654", + "color": "Gray" + }, + { + "id": 9, + "description": "Sweater", + "brand": "Gap", + "product_id": "234567", + "color": "Green" + }, + { + "id": 10, + "description": "Sweater", + "brand": "Gap", + "product_id": "234567", + "color": "Red" + }, + { + "id": 11, + "description": "Sweater", + "brand": "Gap", + "product_id": "234567", + "color": "Blue" + }, + { + "id": 12, + "description": "Jeans", + "brand": "Levi's", + "product_id": "345678", + "color": "Indigo" + }, + { + "id": 13, + "description": "Jeans", + "brand": "Levi's", + "product_id": "345678", + "color": "Black" + }, + { + "id": 14, + "description": "Jeans", + "brand": "Levi's", + "product_id": "345678", + "color": "Stone Wash" + } ]) }); -pub(self) static DOCUMENT_PRIMARY_KEY: &str = "productId"; -pub(self) static DOCUMENT_DISTINCT_KEY: &str = "shopId"; +pub(self) static DOCUMENT_PRIMARY_KEY: &str = "id"; +pub(self) static DOCUMENT_DISTINCT_KEY: &str = "product_id"; /// testing: https://github.com/meilisearch/meilisearch/issues/4078 #[actix_rt::test] @@ -33,31 +121,121 @@ async fn distinct_search_with_offset_no_ranking() { index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await; index.wait_task(1).await; - fn get_hits(Value(response): Value) -> Vec { + fn get_hits(response: &Value) -> Vec<&str> { let hits_array = response["hits"].as_array().unwrap(); - hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_i64().unwrap()).collect::>() + hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_str().unwrap()).collect::>() } - let (response, code) = index.search_post(json!({"limit": 2, "offset": 0})).await; - let hits = get_hits(response); + let (response, code) = index.search_post(json!({"offset": 0, "limit": 2})).await; + let hits = get_hits(&response); snapshot!(code, @"200 OK"); snapshot!(hits.len(), @"2"); - snapshot!(format!("{:?}", hits), @"[1, 2]"); + snapshot!(format!("{:?}", hits), @r#"["123456", "789012"]"#); + snapshot!(response["estimatedTotalHits"] , @"11"); - let (response, code) = index.search_post(json!({"limit": 2, "offset": 2})).await; - let hits = get_hits(response); + let (response, code) = index.search_post(json!({"offset": 2, "limit": 2})).await; + let hits = get_hits(&response); snapshot!(code, @"200 OK"); snapshot!(hits.len(), @"2"); - snapshot!(format!("{:?}", hits), @"[3, 4]"); + snapshot!(format!("{:?}", hits), @r#"["456789", "987654"]"#); + snapshot!(response["estimatedTotalHits"], @"10"); - let (response, code) = index.search_post(json!({"limit": 10, "offset": 4})).await; - let hits = get_hits(response); + let (response, code) = index.search_post(json!({"offset": 4, "limit": 2})).await; + let hits = get_hits(&response); + snapshot!(code, @"200 OK"); + snapshot!(hits.len(), @"2"); + snapshot!(format!("{:?}", hits), @r#"["234567", "345678"]"#); + snapshot!(response["estimatedTotalHits"], @"6"); + + let (response, code) = index.search_post(json!({"offset": 5, "limit": 2})).await; + let hits = get_hits(&response); snapshot!(code, @"200 OK"); snapshot!(hits.len(), @"1"); - snapshot!(format!("{:?}", hits), @"[5]"); + snapshot!(format!("{:?}", hits), @r#"["345678"]"#); + snapshot!(response["estimatedTotalHits"], @"6"); - let (response, code) = index.search_post(json!({"limit": 10, "offset": 5})).await; - let hits = get_hits(response); + let (response, code) = index.search_post(json!({"offset": 6, "limit": 2})).await; + let hits = get_hits(&response); snapshot!(code, @"200 OK"); snapshot!(hits.len(), @"0"); + snapshot!(format!("{:?}", hits), @r#"[]"#); + snapshot!(response["estimatedTotalHits"], @"6"); + + let (response, code) = index.search_post(json!({"offset": 7, "limit": 2})).await; + let hits = get_hits(&response); + snapshot!(code, @"200 OK"); + snapshot!(hits.len(), @"0"); + snapshot!(format!("{:?}", hits), @r#"[]"#); + snapshot!(response["estimatedTotalHits"], @"6"); +} + +/// testing: https://github.com/meilisearch/meilisearch/issues/4130 +#[actix_rt::test] +async fn distinct_search_with_pagination_no_ranking() { + let server = Server::new().await; + let index = server.index("test"); + + let documents = DOCUMENTS.clone(); + index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await; + index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await; + index.wait_task(1).await; + + fn get_hits(response: &Value) -> Vec<&str> { + let hits_array = response["hits"].as_array().unwrap(); + hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_str().unwrap()).collect::>() + } + + let (response, code) = index.search_post(json!({"page": 0, "hitsPerPage": 2})).await; + let hits = get_hits(&response); + snapshot!(code, @"200 OK"); + snapshot!(hits.len(), @"0"); + snapshot!(format!("{:?}", hits), @r#"[]"#); + snapshot!(response["page"], @"0"); + snapshot!(response["totalPages"], @"3"); + snapshot!(response["totalHits"], @"6"); + + let (response, code) = index.search_post(json!({"page": 1, "hitsPerPage": 2})).await; + let hits = get_hits(&response); + snapshot!(code, @"200 OK"); + snapshot!(hits.len(), @"2"); + snapshot!(format!("{:?}", hits), @r#"["123456", "789012"]"#); + snapshot!(response["page"], @"1"); + snapshot!(response["totalPages"], @"3"); + snapshot!(response["totalHits"], @"6"); + + let (response, code) = index.search_post(json!({"page": 2, "hitsPerPage": 2})).await; + let hits = get_hits(&response); + snapshot!(code, @"200 OK"); + snapshot!(hits.len(), @"2"); + snapshot!(format!("{:?}", hits), @r#"["456789", "987654"]"#); + snapshot!(response["page"], @"2"); + snapshot!(response["totalPages"], @"3"); + snapshot!(response["totalHits"], @"6"); + + let (response, code) = index.search_post(json!({"page": 3, "hitsPerPage": 2})).await; + let hits = get_hits(&response); + snapshot!(code, @"200 OK"); + snapshot!(hits.len(), @"2"); + snapshot!(format!("{:?}", hits), @r#"["234567", "345678"]"#); + snapshot!(response["page"], @"3"); + snapshot!(response["totalPages"], @"3"); + snapshot!(response["totalHits"], @"6"); + + let (response, code) = index.search_post(json!({"page": 4, "hitsPerPage": 2})).await; + let hits = get_hits(&response); + snapshot!(code, @"200 OK"); + snapshot!(hits.len(), @"0"); + snapshot!(format!("{:?}", hits), @r#"[]"#); + snapshot!(response["page"], @"4"); + snapshot!(response["totalPages"], @"3"); + snapshot!(response["totalHits"], @"6"); + + let (response, code) = index.search_post(json!({"page": 2, "hitsPerPage": 3})).await; + let hits = get_hits(&response); + snapshot!(code, @"200 OK"); + snapshot!(hits.len(), @"3"); + snapshot!(format!("{:?}", hits), @r#"["987654", "234567", "345678"]"#); + snapshot!(response["page"], @"2"); + snapshot!(response["totalPages"], @"2"); + snapshot!(response["totalHits"], @"6"); } diff --git a/milli/tests/search/distinct.rs b/milli/tests/search/distinct.rs index e1876286c..fc890dfe8 100644 --- a/milli/tests/search/distinct.rs +++ b/milli/tests/search/distinct.rs @@ -202,7 +202,7 @@ test_distinct!( EXTERNAL_DOCUMENTS_IDS.len(), 1, vec![], - 2 + 3 ); test_distinct!( // testing: https://github.com/meilisearch/meilisearch/issues/4078 @@ -212,7 +212,7 @@ test_distinct!( 1, 2, vec![], - 1 + 3 ); test_distinct!( // testing: https://github.com/meilisearch/meilisearch/issues/4078 @@ -222,7 +222,7 @@ test_distinct!( EXTERNAL_DOCUMENTS_IDS.len(), 2, vec![], - 5 + 7 ); test_distinct!( // testing: https://github.com/meilisearch/meilisearch/issues/4078 @@ -232,5 +232,5 @@ test_distinct!( 2, 4, vec![], - 3 + 7 ); From 2bae9550c8f1f5874c4e1bab5c3fe767dce416ec Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 19 Oct 2023 15:58:25 +0200 Subject: [PATCH 04/17] Add explanatory comment --- milli/src/search/new/bucket_sort.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/milli/src/search/new/bucket_sort.rs b/milli/src/search/new/bucket_sort.rs index df9c14c7d..b46f6124f 100644 --- a/milli/src/search/new/bucket_sort.rs +++ b/milli/src/search/new/bucket_sort.rs @@ -60,6 +60,9 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( let mut all_candidates = universe - excluded; all_candidates.extend(results.iter().copied()); + // drain the results of the skipped elements + // this **must** be done **after** writing the entire results in `all_candidates` to ensure + // e.g. estimatedTotalHits is correct. if results.len() >= from { results.drain(..from); } else { From ee6f79d60b3c6d2dd9ebe1cbda4850b1a8aed9ff Mon Sep 17 00:00:00 2001 From: curquiza Date: Mon, 23 Oct 2023 11:49:07 +0000 Subject: [PATCH 05/17] Update version for the next release (v1.5.0) in Cargo.toml --- Cargo.lock | 28 ++++++++++++++-------------- Cargo.toml | 2 +- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index aa7df19ed..d41708399 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -468,7 +468,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" [[package]] name = "benchmarks" -version = "1.4.2" +version = "1.5.0" dependencies = [ "anyhow", "bytes", @@ -1206,7 +1206,7 @@ dependencies = [ [[package]] name = "dump" -version = "1.4.2" +version = "1.5.0" dependencies = [ "anyhow", "big_s", @@ -1417,7 +1417,7 @@ dependencies = [ [[package]] name = "file-store" -version = "1.4.2" +version = "1.5.0" dependencies = [ "faux", "tempfile", @@ -1439,7 +1439,7 @@ dependencies = [ [[package]] name = "filter-parser" -version = "1.4.2" +version = "1.5.0" dependencies = [ "insta", "nom", @@ -1459,7 +1459,7 @@ dependencies = [ [[package]] name = "flatten-serde-json" -version = "1.4.2" +version = "1.5.0" dependencies = [ "criterion", "serde_json", @@ -1577,7 +1577,7 @@ dependencies = [ [[package]] name = "fuzzers" -version = "1.4.2" +version = "1.5.0" dependencies = [ "arbitrary", "clap", @@ -1891,7 +1891,7 @@ dependencies = [ [[package]] name = "index-scheduler" -version = "1.4.2" +version = "1.5.0" dependencies = [ "anyhow", "big_s", @@ -2088,7 +2088,7 @@ dependencies = [ [[package]] name = "json-depth-checker" -version = "1.4.2" +version = "1.5.0" dependencies = [ "criterion", "serde_json", @@ -2500,7 +2500,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" [[package]] name = "meili-snap" -version = "1.4.2" +version = "1.5.0" dependencies = [ "insta", "md5", @@ -2509,7 +2509,7 @@ dependencies = [ [[package]] name = "meilisearch" -version = "1.4.2" +version = "1.5.0" dependencies = [ "actix-cors", "actix-http", @@ -2599,7 +2599,7 @@ dependencies = [ [[package]] name = "meilisearch-auth" -version = "1.4.2" +version = "1.5.0" dependencies = [ "base64 0.21.2", "enum-iterator", @@ -2618,7 +2618,7 @@ dependencies = [ [[package]] name = "meilisearch-types" -version = "1.4.2" +version = "1.5.0" dependencies = [ "actix-web", "anyhow", @@ -2672,7 +2672,7 @@ dependencies = [ [[package]] name = "milli" -version = "1.4.2" +version = "1.5.0" dependencies = [ "big_s", "bimap", @@ -2994,7 +2994,7 @@ checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" [[package]] name = "permissive-json-pointer" -version = "1.4.2" +version = "1.5.0" dependencies = [ "big_s", "serde_json", diff --git a/Cargo.toml b/Cargo.toml index a40af10f7..cc16d50db 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,7 +18,7 @@ members = [ ] [workspace.package] -version = "1.4.2" +version = "1.5.0" authors = ["Quentin de Quelen ", "Clément Renault "] description = "Meilisearch HTTP server" homepage = "https://meilisearch.com" From 4c6fddb1cb2c5ca3b2931135d3e0b955a504247d Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 26 Oct 2023 17:01:10 +0200 Subject: [PATCH 06/17] update charabia --- Cargo.lock | 782 +++++++++++++++++++++++++++++++++-- meilisearch-types/Cargo.toml | 3 +- meilisearch/Cargo.toml | 1 + milli/Cargo.toml | 7 +- 4 files changed, 761 insertions(+), 32 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d41708399..bee967723 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -558,7 +558,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6798148dccfbff0fae41c7574d2fa8f1ef3492fba0face179de5d8d447d67b05" dependencies = [ "memchr", - "regex-automata", + "regex-automata 0.3.6", "serde", ] @@ -646,6 +646,16 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "calendrical_calculations" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8dfe3bc6a50b4667fafdb6d9cf26731c5418c457e317d8166c972014facf9a5d" +dependencies = [ + "core_maths", + "displaydoc", +] + [[package]] name = "cargo_toml" version = "0.15.3" @@ -699,9 +709,9 @@ dependencies = [ [[package]] name = "charabia" -version = "0.8.3" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "098219a776307414866165a03a9cc68c1578764fe3616fe979e1c280790ddd73" +checksum = "ffb924701d850fbf0331302e7f9715c04e494b4b9bebb38ac48bdd30924e1936" dependencies = [ "aho-corasick", "cow-utils", @@ -709,11 +719,15 @@ dependencies = [ "deunicode", "either", "fst", + "icu", + "icu_provider", + "icu_provider_blob", "irg-kvariants", "jieba-rs", "lindera-core", "lindera-dictionary", "lindera-tokenizer", + "litemap 0.6.1", "once_cell", "pinyin", "serde", @@ -721,6 +735,7 @@ dependencies = [ "unicode-normalization", "wana_kana", "whatlang", + "zerovec 0.9.6", ] [[package]] @@ -801,6 +816,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2da6da31387c7e4ef160ffab6d5e7f00c42626fe39aea70a7b0f1773f7dd6c1b" +[[package]] +name = "cobs" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67ba02a97a2bd10f4b59b25c7973101c79642302776489e030cd13cdab09ed15" + [[package]] name = "colorchoice" version = "1.0.0" @@ -874,6 +895,15 @@ version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" +[[package]] +name = "core_maths" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3b02505ccb8c50b0aa21ace0fc08c3e53adebd4e58caa18a36152803c7709a3" +dependencies = [ + "libm", +] + [[package]] name = "cow-utils" version = "0.1.2" @@ -1073,6 +1103,15 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "deduplicating_array" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a636096586ca093a10ac0175bfb384d024089dca0dae54e3e69bc1c1596358e8" +dependencies = [ + "serde", +] + [[package]] name = "deranged" version = "0.3.7" @@ -1204,6 +1243,17 @@ dependencies = [ "winapi", ] +[[package]] +name = "displaydoc" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "487585f4d0c6655fe74905e2504d8ad6908e4db67f744eb140876906c2f3175d" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.28", +] + [[package]] name = "dump" version = "1.5.0" @@ -1238,6 +1288,12 @@ dependencies = [ "serde", ] +[[package]] +name = "embedded-io" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef1a6892d9eef45c8fa6b9e0086428a2cca8491aca8f787c534a3d6d0bcb3ced" + [[package]] name = "encode_unicode" version = "0.3.6" @@ -1447,6 +1503,17 @@ dependencies = [ "unescaper", ] +[[package]] +name = "fixed_decimal" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5287d527037d0f35c8801880361eb38bb9bce194805350052c2a79538388faeb" +dependencies = [ + "displaydoc", + "smallvec", + "writeable", +] + [[package]] name = "flate2" version = "1.0.26" @@ -1873,6 +1940,487 @@ dependencies = [ "tokio-rustls 0.24.1", ] +[[package]] +name = "icu" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30f75f394ebee8d539bef8f6f02ad7b5f41c33de74c9eae1a50337b382a5aab1" +dependencies = [ + "icu_calendar", + "icu_casemap", + "icu_collator", + "icu_collections", + "icu_compactdecimal", + "icu_datetime", + "icu_decimal", + "icu_displaynames", + "icu_list", + "icu_locid", + "icu_locid_transform", + "icu_normalizer", + "icu_plurals", + "icu_properties", + "icu_provider", + "icu_relativetime", + "icu_segmenter", + "icu_timezone", + "icu_transliterate", +] + +[[package]] +name = "icu_calendar" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b520c5675775e3838447c33fc55bf558148c6824ef0d20ff7a9e0df7345a281c" +dependencies = [ + "calendrical_calculations", + "displaydoc", + "icu_calendar_data", + "icu_locid", + "icu_locid_transform", + "icu_provider", + "serde", + "tinystr", + "writeable", + "zerovec 0.10.0", +] + +[[package]] +name = "icu_calendar_data" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75d8d1a514ca7e6dc547be930f2fd661d578909c07cf1c1adade81c3f7a78840" + +[[package]] +name = "icu_casemap" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "976068d7759293cbd9daa0d1669618bb9094c7ee54e546cd8b877dd4fe59007a" +dependencies = [ + "displaydoc", + "icu_casemap_data", + "icu_collections", + "icu_locid", + "icu_properties", + "icu_provider", + "serde", + "writeable", + "zerovec 0.10.0", +] + +[[package]] +name = "icu_casemap_data" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1251070c14d5b94cd00f97025e9cedce6a6eeb39485e2a226c58432cc4f72ffd" + +[[package]] +name = "icu_collator" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be493c81154545a00fc5196e814cae0e1470bc696d518b5df877049aa6bcefe1" +dependencies = [ + "displaydoc", + "icu_collator_data", + "icu_collections", + "icu_locid", + "icu_locid_transform", + "icu_normalizer", + "icu_properties", + "icu_provider", + "serde", + "smallvec", + "utf16_iter", + "utf8_iter", + "zerovec 0.10.0", +] + +[[package]] +name = "icu_collator_data" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4dbe9abe5ce570ad4707026f37bc21ef95c36b945c3c4564b9aa4e2e1c043126" + +[[package]] +name = "icu_collections" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3907b2246e8dd5a29ead8a965e7c0c8a90e9b928e614a4279257d45c5e553e91" +dependencies = [ + "displaydoc", + "serde", + "yoke", + "zerofrom", + "zerovec 0.10.0", +] + +[[package]] +name = "icu_compactdecimal" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a8bb9143e7681fd5f5877c76f7b6365e173545d00d0e12ef23ba1888a996baa" +dependencies = [ + "displaydoc", + "fixed_decimal", + "icu_compactdecimal_data", + "icu_decimal", + "icu_locid_transform", + "icu_plurals", + "icu_provider", + "writeable", + "zerovec 0.10.0", +] + +[[package]] +name = "icu_compactdecimal_data" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2e9b7585f26db531ea5aaedaa68cb66cd2be37fe698b33a289849ff3129545b" + +[[package]] +name = "icu_datetime" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f5bf2e6dd961b59ee5935070220915db6cf0ab5137de362964f800c2b7d14fa" +dependencies = [ + "displaydoc", + "either", + "fixed_decimal", + "icu_calendar", + "icu_datetime_data", + "icu_decimal", + "icu_locid", + "icu_locid_transform", + "icu_plurals", + "icu_provider", + "icu_timezone", + "litemap 0.7.1", + "serde", + "smallvec", + "tinystr", + "writeable", + "zerovec 0.10.0", +] + +[[package]] +name = "icu_datetime_data" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "078b2ed516a2f5054ee7f55b1fe970b92e90ae4cace8a0fe1e5f9fc2e94be609" + +[[package]] +name = "icu_decimal" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1986a0b7df834aaddb911b4593c990950ac5606fc83ce9aad4311be80f51e81a" +dependencies = [ + "displaydoc", + "fixed_decimal", + "icu_decimal_data", + "icu_locid", + "icu_locid_transform", + "icu_provider", + "serde", + "writeable", +] + +[[package]] +name = "icu_decimal_data" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c064b3828953151f8c610bfff6fec776f958641249ebfd1cf36f073f0654e77" + +[[package]] +name = "icu_displaynames" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c98329d348e918ac7e88e6d6613a46bef09ca8a65db4ddf70d86e6eaac0e2ec3" +dependencies = [ + "icu_displaynames_data", + "icu_locid", + "icu_locid_transform", + "icu_provider", + "serde", + "tinystr", + "zerovec 0.10.0", +] + +[[package]] +name = "icu_displaynames_data" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60f9f56c427f1e80383667e8fb13c07707f6561839283115617cc67307a5d020" + +[[package]] +name = "icu_list" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc1a44bbed77a7e7b555f9d7dd4b43f75ec1402b438a901d20451943d50cbd90" +dependencies = [ + "deduplicating_array", + "displaydoc", + "icu_list_data", + "icu_locid_transform", + "icu_provider", + "regex-automata 0.2.0", + "serde", + "writeable", +] + +[[package]] +name = "icu_list_data" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3237583f0cb7feafabb567c4492fe9ef1d2d4113f6a8798a923273ea5de996d" + +[[package]] +name = "icu_locid" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f284eb342dc49d3e9d9f3b188489d76b5d22dfb1d1a5e0d1941811253bac625c" +dependencies = [ + "displaydoc", + "litemap 0.7.1", + "serde", + "tinystr", + "writeable", + "zerovec 0.10.0", +] + +[[package]] +name = "icu_locid_transform" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6551daf80882d8e68eee186cc19e132d8bde1b1f059a79b93384a5ca0e8fc5e7" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_locid_transform_data", + "icu_provider", + "serde", + "tinystr", + "zerovec 0.10.0", +] + +[[package]] +name = "icu_locid_transform_data" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a741eba5431f75eb2f1f9022d3cffabcadda6771e54fb4e77c8ba8653e4da44" + +[[package]] +name = "icu_normalizer" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "080fc33a720d50a7342b0c58df010fbcfb842d6f78ef81555f8b1ac6bba57d3c" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "serde", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec 0.10.0", +] + +[[package]] +name = "icu_normalizer_data" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f8d22f74066c2e6442db2a9aa14950278e86719e811e304e48bae03094b369d" + +[[package]] +name = "icu_plurals" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20556516b8be2b2f5dc3d6b23884b65c5c59ed8be0b44c419e4808c9b0792fce" +dependencies = [ + "displaydoc", + "fixed_decimal", + "icu_locid", + "icu_locid_transform", + "icu_plurals_data", + "icu_provider", + "serde", + "zerovec 0.10.0", +] + +[[package]] +name = "icu_plurals_data" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc552215224997aaaa4e05d95981386d3c52042acebfcc732137d5d9be96a21" + +[[package]] +name = "icu_properties" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3477ae70f8ca8dc08ff7574b5398ed0a2f2e4e6b66bdff2558a92ed67e262be1" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid_transform", + "icu_properties_data", + "icu_provider", + "serde", + "tinystr", + "zerovec 0.10.0", +] + +[[package]] +name = "icu_properties_data" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c8bb3b67a8347e94d580434369e5c7ee89999b9309d04b7cfc88dfaa0f31b59" + +[[package]] +name = "icu_provider" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68acdef80034b5e35d8524e9817479d389a4f9774f3f0cbe1bf3884d80fd5934" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "postcard", + "serde", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerovec 0.10.0", +] + +[[package]] +name = "icu_provider_blob" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31326d28c7f95a964a4f0ee86c24002da5f6db907e3bcb079949b4ff103b6a9" +dependencies = [ + "icu_provider", + "postcard", + "serde", + "writeable", + "zerovec 0.10.0", +] + +[[package]] +name = "icu_provider_macros" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2060258edfcfe32ca7058849bf0f146cb5c59aadbedf480333c0d0002f97bc99" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.28", +] + +[[package]] +name = "icu_relativetime" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4e6c1b531ab35f5b0cb552d3fb8dab1cb49f98e68e12bdc2169ca15e805207c" +dependencies = [ + "displaydoc", + "fixed_decimal", + "icu_decimal", + "icu_locid_transform", + "icu_plurals", + "icu_provider", + "icu_relativetime_data", + "serde", + "writeable", + "zerovec 0.10.0", +] + +[[package]] +name = "icu_relativetime_data" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71ec2ca0aff8c6865075c6257bc91d21a77acb6465635306a280af89208bed24" + +[[package]] +name = "icu_segmenter" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcb3c1981ce2187a745f391a741cb14e77453325acb3b2e014b05da51c0a39f2" +dependencies = [ + "core_maths", + "displaydoc", + "icu_collections", + "icu_locid", + "icu_provider", + "icu_segmenter_data", + "serde", + "utf8_iter", + "zerovec 0.10.0", +] + +[[package]] +name = "icu_segmenter_data" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9703f6713044d1c0a1335a6d78ffece4c9380582416ace6feeb608e84d279fc7" + +[[package]] +name = "icu_timezone" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19e6401cd210ccda98b2e7fc707831b29c6efe319efbbec460f957b6f331f626" +dependencies = [ + "displaydoc", + "icu_calendar", + "icu_locid", + "icu_provider", + "icu_timezone_data", + "serde", + "tinystr", + "zerotrie", + "zerovec 0.10.0", +] + +[[package]] +name = "icu_timezone_data" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d7e214a653bac59b768c42f82d252f13af95e8a9cb07b6108b8bc723c561b43" + +[[package]] +name = "icu_transliterate" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4bdf006774b5a5898d97af6c95b148d34cd5c87cbed00610ff873e5b5885e28" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid", + "icu_normalizer", + "icu_properties", + "icu_provider", + "icu_unicodeset_parse", + "litemap 0.7.1", + "serde", + "zerovec 0.10.0", +] + +[[package]] +name = "icu_unicodeset_parse" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2c3c1ab072cb9ec2dfb377ed7be07bf1bdce055b8324ba6392323f588c38c5a" +dependencies = [ + "icu_collections", + "icu_properties", + "icu_provider", + "tinystr", + "zerovec 0.10.0", +] + [[package]] name = "ident_case" version = "1.0.1" @@ -2177,9 +2725,9 @@ dependencies = [ [[package]] name = "lindera-cc-cedict-builder" -version = "0.27.0" +version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d2e8f2ca97ddf952fe340642511b9c14b373cb2eef711d526bb8ef2ca0969b8" +checksum = "6f567a47e47b5420908424de2c6c5e424e3cafe588d0146bd128c0f3755758a3" dependencies = [ "anyhow", "bincode", @@ -2196,9 +2744,9 @@ dependencies = [ [[package]] name = "lindera-compress" -version = "0.27.0" +version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f72b460559bcbe8a9cee85ea4a5056133ed3abf373031191589236e656d65b59" +checksum = "49f3e553d55ebe9881fa5e5de588b0a153456e93564d17dfbef498912caf63a2" dependencies = [ "anyhow", "flate2", @@ -2207,9 +2755,9 @@ dependencies = [ [[package]] name = "lindera-core" -version = "0.27.0" +version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f586eb8a9393c32d5525e0e9336a3727bd1329674740097126f3b0bff8a1a1ea" +checksum = "a9a2440cc156a4a911a174ec68203543d1efb10df3a700a59b6bf581e453c726" dependencies = [ "anyhow", "bincode", @@ -2224,9 +2772,9 @@ dependencies = [ [[package]] name = "lindera-decompress" -version = "0.27.0" +version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fb1facd8da698072fcc7338bd757730db53d59f313f44dd583fa03681dcc0e1" +checksum = "e077a410e61c962cb526f71b7effd62ffc607488a8f61869c937582d2ccb529b" dependencies = [ "anyhow", "flate2", @@ -2235,9 +2783,9 @@ dependencies = [ [[package]] name = "lindera-dictionary" -version = "0.27.0" +version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec7be7410b1da7017a8948986b87af67082f605e9a716f0989790d795d677f0c" +checksum = "d9f57491adf7b311a3ee87f5e4a36454df16a2ec73de4ef28b2106fac80bd782" dependencies = [ "anyhow", "bincode", @@ -2255,9 +2803,9 @@ dependencies = [ [[package]] name = "lindera-ipadic-builder" -version = "0.27.0" +version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "705d07f8a45d04fd95149f7ad41a26d1f9e56c9c00402be6f9dd05e3d88b99c6" +checksum = "a3476ec7748aebd2eb23d496ddfce5e7e0a5c031cffcd214451043e02d029f11" dependencies = [ "anyhow", "bincode", @@ -2276,9 +2824,9 @@ dependencies = [ [[package]] name = "lindera-ipadic-neologd-builder" -version = "0.27.0" +version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "633a93983ba13fba42328311a501091bd4a7aff0c94ae9eaa9d4733dd2b0468a" +checksum = "7b1c7576a02d5e4af2bf62de51790a01bc4b8bc0d0b6a6b86a46b157f5cb306d" dependencies = [ "anyhow", "bincode", @@ -2297,9 +2845,9 @@ dependencies = [ [[package]] name = "lindera-ko-dic" -version = "0.27.0" +version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a428e0d316b6c86f51bd919479692bc41ad840dba266ebc044663970f431ea18" +checksum = "b713ecd5b827d7d448c3c5eb3c6d5899ecaf22cd17087599996349a02c76828d" dependencies = [ "bincode", "byteorder", @@ -2314,9 +2862,9 @@ dependencies = [ [[package]] name = "lindera-ko-dic-builder" -version = "0.27.0" +version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a5288704c6b8a069c0a1705c38758e836497698b50453373ab3d56c6f9a7ef8" +checksum = "3e545752f6487be87b572529ad594cb3b48d2ef20821516f598b2d152d23277b" dependencies = [ "anyhow", "bincode", @@ -2334,9 +2882,9 @@ dependencies = [ [[package]] name = "lindera-tokenizer" -version = "0.27.0" +version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "106ba439b2e87529d9bbedbb88d69f635baba1195c26502b308f55a85885fc81" +checksum = "24a2d4606a5a4da62ac4a3680ee884a75da7f0c892dc967fc9cb983ceba39a8f" dependencies = [ "bincode", "byteorder", @@ -2349,9 +2897,9 @@ dependencies = [ [[package]] name = "lindera-unidic" -version = "0.27.0" +version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3399b6dcfe1701333451d184ff3c677f433b320153427b146360c9e4bd8cb816" +checksum = "388b1bdf81794b5d5b8057ce0321c58ff4b90d676b637948ccc7863ae2f43d28" dependencies = [ "bincode", "byteorder", @@ -2366,9 +2914,9 @@ dependencies = [ [[package]] name = "lindera-unidic-builder" -version = "0.27.0" +version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b698227fdaeac32289173ab389b990d4eb00a40cbc9912020f69a0c491dabf55" +checksum = "cdfa3e29a22c047da57fadd960ff674b720de15a1e2fb35b5ed67f3408afb469" dependencies = [ "anyhow", "bincode", @@ -2402,6 +2950,21 @@ version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57bcfdad1b858c2db7c38303a6d2ad4dfaf5eb53dfeb0910128b2c26d6158503" +[[package]] +name = "litemap" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "575d8a551c59104b4df91269921e5eab561aa1b77c618dac0414b5d44a4617de" + +[[package]] +name = "litemap" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a1a2647d5b7134127971a6de0d533c49de2159167e7f259c427195f87168a1" +dependencies = [ + "serde", +] + [[package]] name = "lmdb-rkv-sys" version = "0.15.1" @@ -3143,6 +3706,17 @@ dependencies = [ "plotters-backend", ] +[[package]] +name = "postcard" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a55c51ee6c0db07e68448e336cf8ea4131a620edefebf9893e759b2d793420f8" +dependencies = [ + "cobs", + "embedded-io", + "serde", +] + [[package]] name = "ppv-lite86" version = "0.2.17" @@ -3343,10 +3917,19 @@ checksum = "81bc1d4caf89fac26a70747fe603c130093b53c773888797a6329091246d651a" dependencies = [ "aho-corasick", "memchr", - "regex-automata", + "regex-automata 0.3.6", "regex-syntax", ] +[[package]] +name = "regex-automata" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9368763f5a9b804326f3af749e16f9abf378d227bcdee7634b13d8f17793782" +dependencies = [ + "memchr", +] + [[package]] name = "regex-automata" version = "0.3.6" @@ -3767,6 +4350,9 @@ name = "smallvec" version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9" +dependencies = [ + "serde", +] [[package]] name = "smartstring" @@ -3882,6 +4468,18 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "synstructure" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "285ba80e733fac80aa4270fbcdf83772a79b80aa35c97075320abfee4a915b06" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.28", + "unicode-xid", +] + [[package]] name = "sysinfo" version = "0.29.7" @@ -3987,6 +4585,17 @@ dependencies = [ "time-core", ] +[[package]] +name = "tinystr" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5d0e245e80bdc9b4e5356fc45a72184abbc3861992603f515270e9340f5a219" +dependencies = [ + "displaydoc", + "serde", + "zerovec 0.10.0", +] + [[package]] name = "tinytemplate" version = "1.2.1" @@ -4257,12 +4866,24 @@ version = "2.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" +[[package]] +name = "utf16_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52df8b7fb78e7910d776fccf2e42ceaf3604d55e8e7eb2dbd183cb1441d8a692" + [[package]] name = "utf8-width" version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5190c9442dcdaf0ddd50f37420417d219ae5261bbf5db120d0f9bab996c9cba1" +[[package]] +name = "utf8_iter" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64a8922555b9500e3d865caed19330172cd67cbf82203f1a3311d8c305cc9f33" + [[package]] name = "utf8parse" version = "0.2.1" @@ -4647,6 +5268,18 @@ dependencies = [ "winapi", ] +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0af0c3d13faebf8dda0b5256fa7096a2d5ccb662f7b9f54a40fe201077ab1c2" + [[package]] name = "xattr" version = "1.0.1" @@ -4681,6 +5314,30 @@ dependencies = [ "url", ] +[[package]] +name = "yoke" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61e38c508604d6bbbd292dadb3c02559aa7fff6b654a078a36217cad871636e4" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5e19fb6ed40002bab5403ffa37e53e0e56f914a4450c8765f533018db1db35f" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.28", + "synstructure 0.13.0", +] + [[package]] name = "zerocopy" version = "0.3.0" @@ -4699,7 +5356,74 @@ checksum = "d498dbd1fd7beb83c86709ae1c33ca50942889473473d287d56ce4770a18edfb" dependencies = [ "proc-macro2", "syn 1.0.109", - "synstructure", + "synstructure 0.12.6", +] + +[[package]] +name = "zerofrom" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "655b0814c5c0b19ade497851070c640773304939a6c0fd5f5fb43da0696d05b7" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6a647510471d372f2e6c2e6b7219e44d8c574d24fdc11c610a61455782f18c3" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.28", + "synstructure 0.13.0", +] + +[[package]] +name = "zerotrie" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9685bb4deb98dab812e87c296a9631fc00d7ca4bc5c2c5f304f375bbed711a8a" +dependencies = [ + "displaydoc", + "litemap 0.7.1", + "serde", + "yoke", + "zerofrom", + "zerovec 0.10.0", +] + +[[package]] +name = "zerovec" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "591691014119b87047ead4dcf3e6adfbf73cb7c38ab6980d4f18a32138f35d46" +dependencies = [ + "zerofrom", +] + +[[package]] +name = "zerovec" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1194130c5b155bf8ae50ab16c86ab758cd695cf9ad176d2f870b744cbdbb572e" +dependencies = [ + "serde", + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acabf549809064225ff8878baedc4ce3732ac3b07e7c7ce6e5c2ccdbc485c324" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.28", ] [[package]] diff --git a/meilisearch-types/Cargo.toml b/meilisearch-types/Cargo.toml index 147810ef4..639596fa6 100644 --- a/meilisearch-types/Cargo.toml +++ b/meilisearch-types/Cargo.toml @@ -50,6 +50,7 @@ hebrew = ["milli/hebrew"] japanese = ["milli/japanese"] # thai specialized tokenization thai = ["milli/thai"] - # allow greek specialized tokenization greek = ["milli/greek"] +# allow khmer specialized tokenization +khmer = ["milli/khmer"] diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml index 35a4a4304..e14116645 100644 --- a/meilisearch/Cargo.toml +++ b/meilisearch/Cargo.toml @@ -150,6 +150,7 @@ hebrew = ["meilisearch-types/hebrew"] japanese = ["meilisearch-types/japanese"] thai = ["meilisearch-types/thai"] greek = ["meilisearch-types/greek"] +khmer = ["meilisearch-types/khmer"] [package.metadata.mini-dashboard] assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.11/build.zip" diff --git a/milli/Cargo.toml b/milli/Cargo.toml index b19b40e85..cf5fe9726 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -17,7 +17,7 @@ bincode = "1.3.3" bstr = "1.4.0" bytemuck = { version = "1.13.1", features = ["extern_crate_alloc"] } byteorder = "1.4.3" -charabia = { version = "0.8.3", default-features = false } +charabia = { version = "0.8.5", default-features = false } concat-arrays = "0.1.2" crossbeam-channel = "0.5.8" deserr = { version = "0.6.0", features = ["actix-web"]} @@ -82,7 +82,7 @@ md5 = "0.7.0" rand = { version = "0.8.5", features = ["small_rng"] } [features] -all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek"] +all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek", "charabia/khmer"] # Use POSIX semaphores instead of SysV semaphores in LMDB # For more information on this feature, see heed's Cargo.toml @@ -106,3 +106,6 @@ thai = ["charabia/thai"] # allow greek specialized tokenization greek = ["charabia/greek"] + +# allow khmer specialized tokenization +khmer = ["charabia/khmer"] From e7244aa4851e67b729bddc436a222ec8de8eccb2 Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 30 Oct 2023 11:00:46 +0100 Subject: [PATCH 07/17] fix warnings --- meilisearch/tests/common/mod.rs | 2 ++ milli/src/search/facet/filter.rs | 2 +- milli/src/search/mod.rs | 2 +- milli/src/search/new/ranking_rule_graph/mod.rs | 2 +- milli/src/update/index_documents/helpers/mod.rs | 2 +- milli/src/update/index_documents/mod.rs | 5 +---- 6 files changed, 7 insertions(+), 8 deletions(-) diff --git a/meilisearch/tests/common/mod.rs b/meilisearch/tests/common/mod.rs index 9ee9b755e..d7888b7db 100644 --- a/meilisearch/tests/common/mod.rs +++ b/meilisearch/tests/common/mod.rs @@ -5,9 +5,11 @@ pub mod service; use std::fmt::{self, Display}; +#[allow(unused)] pub use index::{GetAllDocumentsOptions, GetDocumentOptions}; use meili_snap::json_string; use serde::{Deserialize, Serialize}; +#[allow(unused)] pub use server::{default_settings, Server}; #[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)] diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index fac7b68ea..c4cdb37e6 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -3,7 +3,7 @@ use std::fmt::{Debug, Display}; use std::ops::Bound::{self, Excluded, Included}; use either::Either; -pub use filter_parser::{Condition, Error as FPError, FilterCondition, Span, Token}; +pub use filter_parser::{Condition, Error as FPError, FilterCondition, Token}; use roaring::RoaringBitmap; use serde_json::Value; diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 2c78acfdf..786b565ae 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -11,7 +11,7 @@ use once_cell::sync::Lazy; use roaring::bitmap::RoaringBitmap; pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET}; -pub use self::new::matches::{FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWords}; +pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords}; use self::new::PartialSearchResult; use crate::error::UserError; use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue}; diff --git a/milli/src/search/new/ranking_rule_graph/mod.rs b/milli/src/search/new/ranking_rule_graph/mod.rs index 209ec91de..c1b8df1b7 100644 --- a/milli/src/search/new/ranking_rule_graph/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/mod.rs @@ -29,7 +29,7 @@ use std::hash::Hash; pub use cheapest_paths::PathVisitor; pub use condition_docids_cache::ConditionDocIdsCache; pub use dead_ends_cache::DeadEndsCache; -pub use exactness::{ExactnessCondition, ExactnessGraph}; +pub use exactness::ExactnessGraph; pub use fid::{FidCondition, FidGraph}; pub use position::{PositionCondition, PositionGraph}; pub use proximity::{ProximityCondition, ProximityGraph}; diff --git a/milli/src/update/index_documents/helpers/mod.rs b/milli/src/update/index_documents/helpers/mod.rs index d59a3bc08..c403f9e3d 100644 --- a/milli/src/update/index_documents/helpers/mod.rs +++ b/milli/src/update/index_documents/helpers/mod.rs @@ -14,7 +14,7 @@ pub use grenad_helpers::{ }; pub use merge_functions::{ concat_u32s_array, keep_first, keep_latest_obkv, merge_btreeset_string, - merge_cbo_roaring_bitmaps, merge_obkvs_and_operations, merge_roaring_bitmaps, merge_two_obkvs, + merge_cbo_roaring_bitmaps, merge_obkvs_and_operations, merge_roaring_bitmaps, serialize_roaring_bitmap, MergeFn, }; diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 52aa1113e..b3e7e203e 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -20,10 +20,7 @@ use slice_group_by::GroupBy; use typed_chunk::{write_typed_chunk_into_index, TypedChunk}; use self::enrich::enrich_documents_batch; -pub use self::enrich::{ - extract_finite_float_from_value, validate_document_id, validate_document_id_value, - validate_geo_from_json, DocumentId, -}; +pub use self::enrich::{extract_finite_float_from_value, DocumentId}; pub use self::helpers::{ as_cloneable_grenad, create_sorter, create_writer, fst_stream_into_hashset, fst_stream_into_vec, merge_btreeset_string, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps, From 13416ccbf70c76a507f70b266bd577f3623ce597 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 25 Oct 2023 10:49:50 +0200 Subject: [PATCH 08/17] Introduce a new meilitool to help the cloud team --- Cargo.lock | 118 ++++++++------ Cargo.toml | 1 + index-scheduler/src/lib.rs | 2 +- meilitool/Cargo.toml | 19 +++ meilitool/src/main.rs | 312 ++++++++++++++++++++++++++++++++++++ meilitool/src/uuid_codec.rs | 24 +++ 6 files changed, 426 insertions(+), 50 deletions(-) create mode 100644 meilitool/Cargo.toml create mode 100644 meilitool/src/main.rs create mode 100644 meilitool/src/uuid_codec.rs diff --git a/Cargo.lock b/Cargo.lock index bee967723..881a44197 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -310,16 +310,15 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstream" -version = "0.3.2" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ca84f3628370c59db74ee214b3263d58f9aadd9b4fe7e711fd87dc452b7f163" +checksum = "2ab91ebe16eb252986481c5b62f6098f3b698a45e34b5b98200cf20dd2484a44" dependencies = [ "anstyle", "anstyle-parse", "anstyle-query", "anstyle-wincon", "colorchoice", - "is-terminal", "utf8parse", ] @@ -349,9 +348,9 @@ dependencies = [ [[package]] name = "anstyle-wincon" -version = "1.0.1" +version = "3.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "180abfa45703aebe0093f79badacc01b8fd4ea2e35118747e5811127f926e188" +checksum = "f0699d10d2f4d628a98ee7b57b289abbc98ff3bad977cb3152709d4bf2330628" dependencies = [ "anstyle", "windows-sys 0.48.0", @@ -359,9 +358,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.72" +version = "1.0.75" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b13c32d80ecc7ab747b80c3784bce54ee8a7a0cc4fbda9bf4cda2cf6fe90854" +checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" dependencies = [ "backtrace", ] @@ -777,20 +776,19 @@ dependencies = [ [[package]] name = "clap" -version = "4.3.21" +version = "4.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c27cdf28c0f604ba3f512b0c9a409f8de8513e4816705deb0498b627e7c3a3fd" +checksum = "ac495e00dcec98c83465d5ad66c5c4fabd652fd6686e7c6269b117e729a6f17b" dependencies = [ "clap_builder", "clap_derive", - "once_cell", ] [[package]] name = "clap_builder" -version = "4.3.21" +version = "4.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08a9f1ab5e9f01a9b81f202e8562eb9a10de70abf9eaeac1be465c28b75aa4aa" +checksum = "c77ed9a32a62e6ca27175d00d29d05ca32e396ea1eb5fb01d8256b669cec7663" dependencies = [ "anstream", "anstyle", @@ -800,9 +798,9 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.3.12" +version = "4.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54a9bb5758fc5dfe728d1019941681eccaf0cf8a4189b692a0ee2f2ecf90a050" +checksum = "cf9804afaaf59a91e75b022a30fb7229a7901f60c755489cc61c9b423b836442" dependencies = [ "heck", "proc-macro2", @@ -812,9 +810,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2da6da31387c7e4ef160ffab6d5e7f00c42626fe39aea70a7b0f1773f7dd6c1b" +checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1" [[package]] name = "cobs" @@ -1114,10 +1112,11 @@ dependencies = [ [[package]] name = "deranged" -version = "0.3.7" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7684a49fb1af197853ef7b2ee694bc1f5b4179556f1e5710e1760c5db6f5e929" +checksum = "0f32d04922c60427da6f9fef14d042d9edddef64cb9d4ce0d64d0685fbeb1fd3" dependencies = [ + "powerfmt", "serde", ] @@ -1276,7 +1275,7 @@ dependencies = [ "tempfile", "thiserror", "time", - "uuid 1.4.1", + "uuid 1.5.0", ] [[package]] @@ -1478,7 +1477,7 @@ dependencies = [ "faux", "tempfile", "thiserror", - "uuid 1.4.1", + "uuid 1.5.0", ] [[package]] @@ -2073,9 +2072,9 @@ dependencies = [ [[package]] name = "icu_compactdecimal_data" -version = "1.3.2" +version = "1.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2e9b7585f26db531ea5aaedaa68cb66cd2be37fe698b33a289849ff3129545b" +checksum = "51cc4515902110b79d180c561c13b87e5b42bad85edf719a1d59ec713cd6ccf7" [[package]] name = "icu_datetime" @@ -2104,9 +2103,9 @@ dependencies = [ [[package]] name = "icu_datetime_data" -version = "1.3.2" +version = "1.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "078b2ed516a2f5054ee7f55b1fe970b92e90ae4cace8a0fe1e5f9fc2e94be609" +checksum = "ced82224d980ffebafebf443a85c062ac6e801a24415324d0f25962b088f55f4" [[package]] name = "icu_decimal" @@ -2126,9 +2125,9 @@ dependencies = [ [[package]] name = "icu_decimal_data" -version = "1.3.2" +version = "1.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c064b3828953151f8c610bfff6fec776f958641249ebfd1cf36f073f0654e77" +checksum = "20116c22b56b74384904ecd5e061fa7ece6e3eb26a48c524fc490ec8f46d26a2" [[package]] name = "icu_displaynames" @@ -2147,9 +2146,9 @@ dependencies = [ [[package]] name = "icu_displaynames_data" -version = "1.3.2" +version = "1.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60f9f56c427f1e80383667e8fb13c07707f6561839283115617cc67307a5d020" +checksum = "220c0ba83e42b255fef61ba9b78f22ba2ce1e27559a4029e3e24092b64f14a06" [[package]] name = "icu_list" @@ -2273,9 +2272,9 @@ dependencies = [ [[package]] name = "icu_properties_data" -version = "1.3.2" +version = "1.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c8bb3b67a8347e94d580434369e5c7ee89999b9309d04b7cfc88dfaa0f31b59" +checksum = "98507b488098f45eb95ef495612a2012e4d8ad6095dda86cb2f1728aa2204a60" [[package]] name = "icu_provider" @@ -2465,7 +2464,7 @@ dependencies = [ "tempfile", "thiserror", "time", - "uuid 1.4.1", + "uuid 1.5.0", ] [[package]] @@ -3153,7 +3152,7 @@ dependencies = [ "tokio-stream", "toml", "urlencoding", - "uuid 1.4.1", + "uuid 1.5.0", "vergen", "walkdir", "yaup", @@ -3176,7 +3175,7 @@ dependencies = [ "sha2", "thiserror", "time", - "uuid 1.4.1", + "uuid 1.5.0", ] [[package]] @@ -3206,7 +3205,21 @@ dependencies = [ "thiserror", "time", "tokio", - "uuid 1.4.1", + "uuid 1.5.0", +] + +[[package]] +name = "meilitool" +version = "1.5.0" +dependencies = [ + "anyhow", + "clap", + "dump", + "file-store", + "meilisearch-auth", + "meilisearch-types", + "time", + "uuid 1.5.0", ] [[package]] @@ -3286,7 +3299,7 @@ dependencies = [ "tempfile", "thiserror", "time", - "uuid 1.4.1", + "uuid 1.5.0", ] [[package]] @@ -3428,9 +3441,9 @@ dependencies = [ [[package]] name = "obkv" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f69e48cd7c8e5bb52a1da1287fdbfd877c32673176583ce664cd63b201aba385" +checksum = "6c459142426056c639ff88d053ebaaaeca0ee1411c94362892398ef4ccd81080" [[package]] name = "once_cell" @@ -3717,6 +3730,12 @@ dependencies = [ "serde", ] +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + [[package]] name = "ppv-lite86" version = "0.2.17" @@ -4185,9 +4204,9 @@ checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918" [[package]] name = "serde" -version = "1.0.183" +version = "1.0.189" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32ac8da02677876d532745a130fc9d8e6edfa81a269b107c5b00829b91d8eb3c" +checksum = "8e422a44e74ad4001bdc8eede9a4570ab52f71190e9c076d14369f38b9200537" dependencies = [ "serde_derive", ] @@ -4212,9 +4231,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.183" +version = "1.0.189" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aafe972d60b0b9bee71a91b92fee2d4fb3c9d7e8f6b179aa99f27203d99a4816" +checksum = "1e48d1f918009ce3145511378cf68d613e3b3d9137d67272562080d68a2b32d5" dependencies = [ "proc-macro2", "quote", @@ -4559,12 +4578,13 @@ dependencies = [ [[package]] name = "time" -version = "0.3.25" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0fdd63d58b18d663fbdf70e049f00a22c8e42be082203be7f26589213cd75ea" +checksum = "c4a34ab300f2dee6e562c10a046fc05e358b29f9bf92277f30c3c8d82275f6f5" dependencies = [ "deranged", "itoa", + "powerfmt", "serde", "time-core", "time-macros", @@ -4572,15 +4592,15 @@ dependencies = [ [[package]] name = "time-core" -version = "0.1.1" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7300fbefb4dadc1af235a9cef3737cea692a9d97e1b9cbcd4ebdae6f8868e6fb" +checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" [[package]] name = "time-macros" -version = "0.2.11" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb71511c991639bb078fd5bf97757e03914361c48100d52878b8e52b46fb92cd" +checksum = "4ad70d68dba9e1f8aceda7aa6711965dfec1cac869f311a51bd08b3a2ccbce20" dependencies = [ "time-core", ] @@ -4901,9 +4921,9 @@ dependencies = [ [[package]] name = "uuid" -version = "1.4.1" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79daa5ed5740825c40b389c5e50312b9c86df53fccd33f281df655642b43869d" +checksum = "88ad59a7560b41a70d191093a945f0b87bc1deeda46fb237479708a1d6b6cdfc" dependencies = [ "getrandom", "serde", diff --git a/Cargo.toml b/Cargo.toml index cc16d50db..7b8fab8e1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,6 +2,7 @@ resolver = "2" members = [ "meilisearch", + "meilitool", "meilisearch-types", "meilisearch-auth", "meili-snap", diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 43ac2355c..3c61880bb 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -27,7 +27,7 @@ mod index_mapper; mod insta_snapshot; mod lru; mod utils; -mod uuid_codec; +pub mod uuid_codec; pub type Result = std::result::Result; pub type TaskId = u32; diff --git a/meilitool/Cargo.toml b/meilitool/Cargo.toml new file mode 100644 index 000000000..7bd2f9eb8 --- /dev/null +++ b/meilitool/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "meilitool" +description = "A CLI to edit a Meilisearch database from the command line" +version.workspace = true +authors.workspace = true +homepage.workspace = true +readme.workspace = true +edition.workspace = true +license.workspace = true + +[dependencies] +anyhow = "1.0.75" +clap = { version = "4.4.7", features = ["derive"] } +dump = { path = "../dump" } +file-store = { path = "../file-store" } +meilisearch-auth = { path = "../meilisearch-auth" } +meilisearch-types = { path = "../meilisearch-types" } +time = { version = "0.3.30", features = ["formatting"] } +uuid = { version = "1.5.0", features = ["v4"], default-features = false } diff --git a/meilitool/src/main.rs b/meilitool/src/main.rs new file mode 100644 index 000000000..2b40e42c2 --- /dev/null +++ b/meilitool/src/main.rs @@ -0,0 +1,312 @@ +use std::fs::{read_dir, read_to_string, remove_file, File}; +use std::io::BufWriter; +use std::path::PathBuf; + +use anyhow::Context; +use clap::{Parser, Subcommand}; +use dump::{DumpWriter, IndexMetadata}; +use file_store::FileStore; +use meilisearch_auth::AuthController; +use meilisearch_types::heed::types::{OwnedType, SerdeJson, Str}; +use meilisearch_types::heed::{Database, Env, EnvOpenOptions, PolyDatabase, RoTxn, RwTxn}; +use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader}; +use meilisearch_types::milli::{obkv_to_json, BEU32}; +use meilisearch_types::tasks::{Status, Task}; +use meilisearch_types::versioning::check_version_file; +use meilisearch_types::Index; +use time::macros::format_description; +use time::OffsetDateTime; +use uuid_codec::UuidCodec; + +mod uuid_codec; + +#[derive(Parser)] +#[command(author, version, about, long_about = None)] +struct Cli { + /// The database path where the Meilisearch is running. + #[arg(long, default_value = "data.ms/")] + db_path: PathBuf, + + #[command(subcommand)] + command: Command, +} + +#[derive(Subcommand)] +enum Command { + /// Clears the task queue and make it empty. + /// + /// This command can be safely executed even if Meilisearch is running and processing tasks. + /// Once the task queue is empty you can restart Meilisearch and no more tasks must be visible, + /// even the ones that were processing. However, it's highly possible that you see the processing + /// tasks in the queue again with an associated internal error message. + ClearTaskQueue, + + /// Exports a dump from the Meilisearch database. + /// + /// Make sure to run this command when Meilisearch is not running or running but not processing tasks. + /// If tasks are being processed while a dump is being exported there are chances for the dump to be + /// malformed with missing tasks. + /// + /// TODO Verify this claim or make sure it cannot happen and we can export dumps + /// without caring about killing Meilisearch first! + ExportADump { + /// The directory in which the dump will be created. + #[arg(long, default_value = "dumps/")] + dump_dir: PathBuf, + + /// Skip dumping the enqueued or processing tasks. + /// + /// Can be useful when there are a lot of them and it is not particularly useful + /// to keep them. Note that only the enqueued tasks takes up space so skipping + /// the processed ones is not particularly interesting. + #[arg(long)] + skip_enqueued_tasks: bool, + }, +} + +fn main() -> anyhow::Result<()> { + let Cli { db_path, command } = Cli::parse(); + + check_version_file(&db_path).context("While checking the version file")?; + + match command { + Command::ClearTaskQueue => clear_task_queue(db_path), + Command::ExportADump { dump_dir, skip_enqueued_tasks } => { + export_a_dump(db_path, dump_dir, skip_enqueued_tasks) + } + } +} + +/// Clears the task queue located at `db_path`. +fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> { + let path = db_path.join("tasks"); + let env = EnvOpenOptions::new() + .max_dbs(100) + .open(&path) + .with_context(|| format!("While trying to open {:?}", path.display()))?; + + eprintln!("Deleting tasks from the database..."); + + let mut wtxn = env.write_txn()?; + let all_tasks = try_opening_poly_database(&env, &wtxn, "all-tasks")?; + let total = all_tasks.len(&wtxn)?; + let status = try_opening_poly_database(&env, &wtxn, "status")?; + let kind = try_opening_poly_database(&env, &wtxn, "kind")?; + let index_tasks = try_opening_poly_database(&env, &wtxn, "index-tasks")?; + let canceled_by = try_opening_poly_database(&env, &wtxn, "canceled_by")?; + let enqueued_at = try_opening_poly_database(&env, &wtxn, "enqueued-at")?; + let started_at = try_opening_poly_database(&env, &wtxn, "started-at")?; + let finished_at = try_opening_poly_database(&env, &wtxn, "finished-at")?; + + try_clearing_poly_database(&mut wtxn, all_tasks, "all-tasks")?; + try_clearing_poly_database(&mut wtxn, status, "status")?; + try_clearing_poly_database(&mut wtxn, kind, "kind")?; + try_clearing_poly_database(&mut wtxn, index_tasks, "index-tasks")?; + try_clearing_poly_database(&mut wtxn, canceled_by, "canceled_by")?; + try_clearing_poly_database(&mut wtxn, enqueued_at, "enqueued-at")?; + try_clearing_poly_database(&mut wtxn, started_at, "started-at")?; + try_clearing_poly_database(&mut wtxn, finished_at, "finished-at")?; + + wtxn.commit().context("While committing the transaction")?; + + eprintln!("Successfully deleted {total} tasks from the tasks database!"); + eprintln!("Deleting the content files from disk..."); + + let mut count = 0usize; + let update_files = db_path.join("update_files"); + let entries = read_dir(&update_files).with_context(|| { + format!("While trying to read the content of {:?}", update_files.display()) + })?; + for result in entries { + match result { + Ok(ent) => match remove_file(ent.path()) { + Ok(_) => count += 1, + Err(e) => eprintln!("Error while deleting {:?}: {}", ent.path().display(), e), + }, + Err(e) => { + eprintln!("Error while reading a file in {:?}: {}", update_files.display(), e) + } + } + } + + eprintln!("Sucessfully deleted {count} content files from disk!"); + + Ok(()) +} + +fn try_opening_database( + env: &Env, + rtxn: &RoTxn, + db_name: &str, +) -> anyhow::Result> { + env.open_database(rtxn, Some(db_name)) + .with_context(|| format!("While opening the {db_name:?} database"))? + .with_context(|| format!("Missing the {db_name:?} database")) +} + +fn try_opening_poly_database( + env: &Env, + rtxn: &RoTxn, + db_name: &str, +) -> anyhow::Result { + env.open_poly_database(rtxn, Some(db_name)) + .with_context(|| format!("While opening the {db_name:?} poly database"))? + .with_context(|| format!("Missing the {db_name:?} poly database")) +} + +fn try_clearing_poly_database( + wtxn: &mut RwTxn, + database: PolyDatabase, + db_name: &str, +) -> anyhow::Result<()> { + database.clear(wtxn).with_context(|| format!("While clearing the {db_name:?} database")) +} + +/// Exports a dump into the dump directory. +fn export_a_dump( + db_path: PathBuf, + dump_dir: PathBuf, + skip_enqueued_tasks: bool, +) -> Result<(), anyhow::Error> { + let started_at = OffsetDateTime::now_utc(); + + // 1. Extracts the instance UID from disk + let instance_uid_path = db_path.join("instance-uid"); + let instance_uid = match read_to_string(&instance_uid_path) { + Ok(content) => match content.trim().parse() { + Ok(uuid) => Some(uuid), + Err(e) => { + eprintln!("Impossible to parse instance-uid: {e}"); + None + } + }, + Err(e) => { + eprintln!("Impossible to read {}: {}", instance_uid_path.display(), e); + None + } + }; + + let dump = DumpWriter::new(instance_uid).context("While creating a new dump")?; + let file_store = + FileStore::new(db_path.join("update_files")).context("While opening the FileStore")?; + + let index_scheduler_path = db_path.join("tasks"); + let env = EnvOpenOptions::new() + .max_dbs(100) + .open(&index_scheduler_path) + .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?; + + eprintln!("Dumping the keys..."); + + // 2. dump the keys + let auth_store = AuthController::new(&db_path, &None) + .with_context(|| format!("While opening the auth store at {}", db_path.display()))?; + let mut dump_keys = dump.create_keys()?; + let mut count = 0; + for key in auth_store.list_keys()? { + dump_keys.push_key(&key)?; + count += 1; + } + dump_keys.flush()?; + + eprintln!("Successfully dumped {count} keys!"); + + let rtxn = env.read_txn()?; + let all_tasks: Database, SerdeJson> = + try_opening_database(&env, &rtxn, "all-tasks")?; + let index_mapping: Database = + try_opening_database(&env, &rtxn, "index-mapping")?; + + if skip_enqueued_tasks { + eprintln!("Skip dumping the enqueued tasks..."); + } else { + eprintln!("Dumping the enqueued tasks..."); + + // 3. dump the tasks + let mut dump_tasks = dump.create_tasks_queue()?; + let mut count = 0; + for ret in all_tasks.iter(&rtxn)? { + let (_, t) = ret?; + let status = t.status; + let content_file = t.content_uuid(); + let mut dump_content_file = dump_tasks.push_task(&t.into())?; + + // 3.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet. + if let Some(content_file_uuid) = content_file { + if status == Status::Enqueued { + let content_file = file_store.get_update(content_file_uuid)?; + + let reader = + DocumentsBatchReader::from_reader(content_file).with_context(|| { + format!("While reading content file {:?}", content_file_uuid) + })?; + + let (mut cursor, documents_batch_index) = reader.into_cursor_and_fields_index(); + while let Some(doc) = cursor.next_document().with_context(|| { + format!("While iterating on content file {:?}", content_file_uuid) + })? { + dump_content_file + .push_document(&obkv_to_object(&doc, &documents_batch_index)?)?; + } + dump_content_file.flush()?; + count += 1; + } + } + } + dump_tasks.flush()?; + + eprintln!("Successfully dumped {count} enqueued tasks!"); + } + + eprintln!("Dumping the indexes..."); + + // 4. Dump the indexes + let mut count = 0; + for result in index_mapping.iter(&rtxn)? { + let (uid, uuid) = result?; + let index_path = db_path.join("indexes").join(uuid.to_string()); + let index = Index::new(EnvOpenOptions::new(), &index_path).with_context(|| { + format!("While trying to open the index at path {:?}", index_path.display()) + })?; + + let rtxn = index.read_txn()?; + let metadata = IndexMetadata { + uid: uid.to_owned(), + primary_key: index.primary_key(&rtxn)?.map(String::from), + created_at: index.created_at(&rtxn)?, + updated_at: index.updated_at(&rtxn)?, + }; + let mut index_dumper = dump.create_index(uid, &metadata)?; + + let fields_ids_map = index.fields_ids_map(&rtxn)?; + let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); + + // 4.1. Dump the documents + for ret in index.all_documents(&rtxn)? { + let (_id, doc) = ret?; + let document = obkv_to_json(&all_fields, &fields_ids_map, doc)?; + index_dumper.push_document(&document)?; + } + + // 4.2. Dump the settings + let settings = meilisearch_types::settings::settings(&index, &rtxn)?; + index_dumper.settings(&settings)?; + count += 1; + } + + eprintln!("Successfully dumped {count} indexes!"); + // We will not dump experimental feature settings + eprintln!("The tool is not dumping experimental features, please set them by hand afterward"); + + let dump_uid = started_at.format(format_description!( + "[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]" + )).unwrap(); + + let path = dump_dir.join(format!("{}.dump", dump_uid)); + let file = File::create(&path)?; + dump.persist_to(BufWriter::new(file))?; + + eprintln!("Dump exported at path {:?}", path.display()); + + Ok(()) +} diff --git a/meilitool/src/uuid_codec.rs b/meilitool/src/uuid_codec.rs new file mode 100644 index 000000000..70a92ca94 --- /dev/null +++ b/meilitool/src/uuid_codec.rs @@ -0,0 +1,24 @@ +use std::borrow::Cow; +use std::convert::TryInto; + +use meilisearch_types::heed::{BytesDecode, BytesEncode}; +use uuid::Uuid; + +/// A heed codec for value of struct Uuid. +pub struct UuidCodec; + +impl<'a> BytesDecode<'a> for UuidCodec { + type DItem = Uuid; + + fn bytes_decode(bytes: &'a [u8]) -> Option { + bytes.try_into().ok().map(Uuid::from_bytes) + } +} + +impl BytesEncode<'_> for UuidCodec { + type EItem = Uuid; + + fn bytes_encode(item: &Self::EItem) -> Option> { + Some(Cow::Borrowed(item.as_bytes())) + } +} From 5b004a2583af8333d466170c0cd3be989b16de49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Mon, 30 Oct 2023 14:23:41 +0100 Subject: [PATCH 09/17] Add more logs to the dump exporter --- meilitool/src/main.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/meilitool/src/main.rs b/meilitool/src/main.rs index 2b40e42c2..e7c9674bb 100644 --- a/meilitool/src/main.rs +++ b/meilitool/src/main.rs @@ -260,6 +260,9 @@ fn export_a_dump( eprintln!("Dumping the indexes..."); + eprintln!("Successfully dumped {count} tasks!"); + eprintln!("Dumping the indexes..."); + // 4. Dump the indexes let mut count = 0; for result in index_mapping.iter(&rtxn)? { From 53382bb1b80f3dc45d68970d631cf4441bd14656 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Mon, 30 Oct 2023 14:06:39 +0100 Subject: [PATCH 10/17] Introduce a new flag to skip dumping enqueued/processing tasks --- meilitool/src/main.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/meilitool/src/main.rs b/meilitool/src/main.rs index e7c9674bb..2b40e42c2 100644 --- a/meilitool/src/main.rs +++ b/meilitool/src/main.rs @@ -260,9 +260,6 @@ fn export_a_dump( eprintln!("Dumping the indexes..."); - eprintln!("Successfully dumped {count} tasks!"); - eprintln!("Dumping the indexes..."); - // 4. Dump the indexes let mut count = 0; for result in index_mapping.iter(&rtxn)? { From f7ea94e5f4aacc52b73a97c7959b7963e130e7b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Mon, 30 Oct 2023 14:17:15 +0100 Subject: [PATCH 11/17] Modify the Dockerfile to compile meilisearch and meilitool --- Cargo.lock | 118 ++++++++++++++++++++++------------------------------- Dockerfile | 9 ++-- 2 files changed, 54 insertions(+), 73 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 881a44197..bee967723 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -310,15 +310,16 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstream" -version = "0.6.4" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ab91ebe16eb252986481c5b62f6098f3b698a45e34b5b98200cf20dd2484a44" +checksum = "0ca84f3628370c59db74ee214b3263d58f9aadd9b4fe7e711fd87dc452b7f163" dependencies = [ "anstyle", "anstyle-parse", "anstyle-query", "anstyle-wincon", "colorchoice", + "is-terminal", "utf8parse", ] @@ -348,9 +349,9 @@ dependencies = [ [[package]] name = "anstyle-wincon" -version = "3.0.1" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0699d10d2f4d628a98ee7b57b289abbc98ff3bad977cb3152709d4bf2330628" +checksum = "180abfa45703aebe0093f79badacc01b8fd4ea2e35118747e5811127f926e188" dependencies = [ "anstyle", "windows-sys 0.48.0", @@ -358,9 +359,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.75" +version = "1.0.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" +checksum = "3b13c32d80ecc7ab747b80c3784bce54ee8a7a0cc4fbda9bf4cda2cf6fe90854" dependencies = [ "backtrace", ] @@ -776,19 +777,20 @@ dependencies = [ [[package]] name = "clap" -version = "4.4.7" +version = "4.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac495e00dcec98c83465d5ad66c5c4fabd652fd6686e7c6269b117e729a6f17b" +checksum = "c27cdf28c0f604ba3f512b0c9a409f8de8513e4816705deb0498b627e7c3a3fd" dependencies = [ "clap_builder", "clap_derive", + "once_cell", ] [[package]] name = "clap_builder" -version = "4.4.7" +version = "4.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c77ed9a32a62e6ca27175d00d29d05ca32e396ea1eb5fb01d8256b669cec7663" +checksum = "08a9f1ab5e9f01a9b81f202e8562eb9a10de70abf9eaeac1be465c28b75aa4aa" dependencies = [ "anstream", "anstyle", @@ -798,9 +800,9 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.4.7" +version = "4.3.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf9804afaaf59a91e75b022a30fb7229a7901f60c755489cc61c9b423b836442" +checksum = "54a9bb5758fc5dfe728d1019941681eccaf0cf8a4189b692a0ee2f2ecf90a050" dependencies = [ "heck", "proc-macro2", @@ -810,9 +812,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.6.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1" +checksum = "2da6da31387c7e4ef160ffab6d5e7f00c42626fe39aea70a7b0f1773f7dd6c1b" [[package]] name = "cobs" @@ -1112,11 +1114,10 @@ dependencies = [ [[package]] name = "deranged" -version = "0.3.9" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f32d04922c60427da6f9fef14d042d9edddef64cb9d4ce0d64d0685fbeb1fd3" +checksum = "7684a49fb1af197853ef7b2ee694bc1f5b4179556f1e5710e1760c5db6f5e929" dependencies = [ - "powerfmt", "serde", ] @@ -1275,7 +1276,7 @@ dependencies = [ "tempfile", "thiserror", "time", - "uuid 1.5.0", + "uuid 1.4.1", ] [[package]] @@ -1477,7 +1478,7 @@ dependencies = [ "faux", "tempfile", "thiserror", - "uuid 1.5.0", + "uuid 1.4.1", ] [[package]] @@ -2072,9 +2073,9 @@ dependencies = [ [[package]] name = "icu_compactdecimal_data" -version = "1.3.4" +version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51cc4515902110b79d180c561c13b87e5b42bad85edf719a1d59ec713cd6ccf7" +checksum = "c2e9b7585f26db531ea5aaedaa68cb66cd2be37fe698b33a289849ff3129545b" [[package]] name = "icu_datetime" @@ -2103,9 +2104,9 @@ dependencies = [ [[package]] name = "icu_datetime_data" -version = "1.3.4" +version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ced82224d980ffebafebf443a85c062ac6e801a24415324d0f25962b088f55f4" +checksum = "078b2ed516a2f5054ee7f55b1fe970b92e90ae4cace8a0fe1e5f9fc2e94be609" [[package]] name = "icu_decimal" @@ -2125,9 +2126,9 @@ dependencies = [ [[package]] name = "icu_decimal_data" -version = "1.3.4" +version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20116c22b56b74384904ecd5e061fa7ece6e3eb26a48c524fc490ec8f46d26a2" +checksum = "3c064b3828953151f8c610bfff6fec776f958641249ebfd1cf36f073f0654e77" [[package]] name = "icu_displaynames" @@ -2146,9 +2147,9 @@ dependencies = [ [[package]] name = "icu_displaynames_data" -version = "1.3.4" +version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "220c0ba83e42b255fef61ba9b78f22ba2ce1e27559a4029e3e24092b64f14a06" +checksum = "60f9f56c427f1e80383667e8fb13c07707f6561839283115617cc67307a5d020" [[package]] name = "icu_list" @@ -2272,9 +2273,9 @@ dependencies = [ [[package]] name = "icu_properties_data" -version = "1.3.4" +version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98507b488098f45eb95ef495612a2012e4d8ad6095dda86cb2f1728aa2204a60" +checksum = "7c8bb3b67a8347e94d580434369e5c7ee89999b9309d04b7cfc88dfaa0f31b59" [[package]] name = "icu_provider" @@ -2464,7 +2465,7 @@ dependencies = [ "tempfile", "thiserror", "time", - "uuid 1.5.0", + "uuid 1.4.1", ] [[package]] @@ -3152,7 +3153,7 @@ dependencies = [ "tokio-stream", "toml", "urlencoding", - "uuid 1.5.0", + "uuid 1.4.1", "vergen", "walkdir", "yaup", @@ -3175,7 +3176,7 @@ dependencies = [ "sha2", "thiserror", "time", - "uuid 1.5.0", + "uuid 1.4.1", ] [[package]] @@ -3205,21 +3206,7 @@ dependencies = [ "thiserror", "time", "tokio", - "uuid 1.5.0", -] - -[[package]] -name = "meilitool" -version = "1.5.0" -dependencies = [ - "anyhow", - "clap", - "dump", - "file-store", - "meilisearch-auth", - "meilisearch-types", - "time", - "uuid 1.5.0", + "uuid 1.4.1", ] [[package]] @@ -3299,7 +3286,7 @@ dependencies = [ "tempfile", "thiserror", "time", - "uuid 1.5.0", + "uuid 1.4.1", ] [[package]] @@ -3441,9 +3428,9 @@ dependencies = [ [[package]] name = "obkv" -version = "0.2.1" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c459142426056c639ff88d053ebaaaeca0ee1411c94362892398ef4ccd81080" +checksum = "f69e48cd7c8e5bb52a1da1287fdbfd877c32673176583ce664cd63b201aba385" [[package]] name = "once_cell" @@ -3730,12 +3717,6 @@ dependencies = [ "serde", ] -[[package]] -name = "powerfmt" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" - [[package]] name = "ppv-lite86" version = "0.2.17" @@ -4204,9 +4185,9 @@ checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918" [[package]] name = "serde" -version = "1.0.189" +version = "1.0.183" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e422a44e74ad4001bdc8eede9a4570ab52f71190e9c076d14369f38b9200537" +checksum = "32ac8da02677876d532745a130fc9d8e6edfa81a269b107c5b00829b91d8eb3c" dependencies = [ "serde_derive", ] @@ -4231,9 +4212,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.189" +version = "1.0.183" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e48d1f918009ce3145511378cf68d613e3b3d9137d67272562080d68a2b32d5" +checksum = "aafe972d60b0b9bee71a91b92fee2d4fb3c9d7e8f6b179aa99f27203d99a4816" dependencies = [ "proc-macro2", "quote", @@ -4578,13 +4559,12 @@ dependencies = [ [[package]] name = "time" -version = "0.3.30" +version = "0.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4a34ab300f2dee6e562c10a046fc05e358b29f9bf92277f30c3c8d82275f6f5" +checksum = "b0fdd63d58b18d663fbdf70e049f00a22c8e42be082203be7f26589213cd75ea" dependencies = [ "deranged", "itoa", - "powerfmt", "serde", "time-core", "time-macros", @@ -4592,15 +4572,15 @@ dependencies = [ [[package]] name = "time-core" -version = "0.1.2" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" +checksum = "7300fbefb4dadc1af235a9cef3737cea692a9d97e1b9cbcd4ebdae6f8868e6fb" [[package]] name = "time-macros" -version = "0.2.15" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ad70d68dba9e1f8aceda7aa6711965dfec1cac869f311a51bd08b3a2ccbce20" +checksum = "eb71511c991639bb078fd5bf97757e03914361c48100d52878b8e52b46fb92cd" dependencies = [ "time-core", ] @@ -4921,9 +4901,9 @@ dependencies = [ [[package]] name = "uuid" -version = "1.5.0" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88ad59a7560b41a70d191093a945f0b87bc1deeda46fb237479708a1d6b6cdfc" +checksum = "79daa5ed5740825c40b389c5e50312b9c86df53fccd33f281df655642b43869d" dependencies = [ "getrandom", "serde", diff --git a/Dockerfile b/Dockerfile index 70950f338..46e98bdaf 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,7 +3,7 @@ FROM rust:alpine3.16 AS compiler RUN apk add -q --update-cache --no-cache build-base openssl-dev -WORKDIR /meilisearch +WORKDIR / ARG COMMIT_SHA ARG COMMIT_DATE @@ -17,7 +17,7 @@ RUN set -eux; \ if [ "$apkArch" = "aarch64" ]; then \ export JEMALLOC_SYS_WITH_LG_PAGE=16; \ fi && \ - cargo build --release + cargo build --release -p meilisearch -p meilitool # Run FROM alpine:3.16 @@ -28,9 +28,10 @@ ENV MEILI_SERVER_PROVIDER docker RUN apk update --quiet \ && apk add -q --no-cache libgcc tini curl -# add meilisearch to the `/bin` so you can run it from anywhere and it's easy -# to find. +# add meilisearch and meilitool to the `/bin` so you can run it from anywhere +# and it's easy to find. COPY --from=compiler /meilisearch/target/release/meilisearch /bin/meilisearch +COPY --from=compiler /meilisearch/target/release/meilitool /bin/meilitool # To stay compatible with the older version of the container (pre v0.27.0) we're # going to symlink the meilisearch binary in the path to `/meilisearch` RUN ln -s /bin/meilisearch /meilisearch From b57b818b67b0a6fd022fd97c04c5b631cdedc623 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Mon, 30 Oct 2023 16:32:32 +0100 Subject: [PATCH 12/17] Don't use the last version of clap --- Cargo.lock | 68 +++++++++++++++++++++++++++++--------------- meilitool/Cargo.toml | 2 +- 2 files changed, 46 insertions(+), 24 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bee967723..017257512 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -359,9 +359,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.72" +version = "1.0.75" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b13c32d80ecc7ab747b80c3784bce54ee8a7a0cc4fbda9bf4cda2cf6fe90854" +checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" dependencies = [ "backtrace", ] @@ -1114,10 +1114,11 @@ dependencies = [ [[package]] name = "deranged" -version = "0.3.7" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7684a49fb1af197853ef7b2ee694bc1f5b4179556f1e5710e1760c5db6f5e929" +checksum = "0f32d04922c60427da6f9fef14d042d9edddef64cb9d4ce0d64d0685fbeb1fd3" dependencies = [ + "powerfmt", "serde", ] @@ -1276,7 +1277,7 @@ dependencies = [ "tempfile", "thiserror", "time", - "uuid 1.4.1", + "uuid 1.5.0", ] [[package]] @@ -1478,7 +1479,7 @@ dependencies = [ "faux", "tempfile", "thiserror", - "uuid 1.4.1", + "uuid 1.5.0", ] [[package]] @@ -2465,7 +2466,7 @@ dependencies = [ "tempfile", "thiserror", "time", - "uuid 1.4.1", + "uuid 1.5.0", ] [[package]] @@ -3153,7 +3154,7 @@ dependencies = [ "tokio-stream", "toml", "urlencoding", - "uuid 1.4.1", + "uuid 1.5.0", "vergen", "walkdir", "yaup", @@ -3176,7 +3177,7 @@ dependencies = [ "sha2", "thiserror", "time", - "uuid 1.4.1", + "uuid 1.5.0", ] [[package]] @@ -3206,7 +3207,21 @@ dependencies = [ "thiserror", "time", "tokio", - "uuid 1.4.1", + "uuid 1.5.0", +] + +[[package]] +name = "meilitool" +version = "1.5.0" +dependencies = [ + "anyhow", + "clap", + "dump", + "file-store", + "meilisearch-auth", + "meilisearch-types", + "time", + "uuid 1.5.0", ] [[package]] @@ -3286,7 +3301,7 @@ dependencies = [ "tempfile", "thiserror", "time", - "uuid 1.4.1", + "uuid 1.5.0", ] [[package]] @@ -3717,6 +3732,12 @@ dependencies = [ "serde", ] +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + [[package]] name = "ppv-lite86" version = "0.2.17" @@ -4185,9 +4206,9 @@ checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918" [[package]] name = "serde" -version = "1.0.183" +version = "1.0.190" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32ac8da02677876d532745a130fc9d8e6edfa81a269b107c5b00829b91d8eb3c" +checksum = "91d3c334ca1ee894a2c6f6ad698fe8c435b76d504b13d436f0685d648d6d96f7" dependencies = [ "serde_derive", ] @@ -4212,9 +4233,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.183" +version = "1.0.190" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aafe972d60b0b9bee71a91b92fee2d4fb3c9d7e8f6b179aa99f27203d99a4816" +checksum = "67c5609f394e5c2bd7fc51efda478004ea80ef42fee983d5c67a65e34f32c0e3" dependencies = [ "proc-macro2", "quote", @@ -4559,12 +4580,13 @@ dependencies = [ [[package]] name = "time" -version = "0.3.25" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0fdd63d58b18d663fbdf70e049f00a22c8e42be082203be7f26589213cd75ea" +checksum = "c4a34ab300f2dee6e562c10a046fc05e358b29f9bf92277f30c3c8d82275f6f5" dependencies = [ "deranged", "itoa", + "powerfmt", "serde", "time-core", "time-macros", @@ -4572,15 +4594,15 @@ dependencies = [ [[package]] name = "time-core" -version = "0.1.1" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7300fbefb4dadc1af235a9cef3737cea692a9d97e1b9cbcd4ebdae6f8868e6fb" +checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" [[package]] name = "time-macros" -version = "0.2.11" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb71511c991639bb078fd5bf97757e03914361c48100d52878b8e52b46fb92cd" +checksum = "4ad70d68dba9e1f8aceda7aa6711965dfec1cac869f311a51bd08b3a2ccbce20" dependencies = [ "time-core", ] @@ -4901,9 +4923,9 @@ dependencies = [ [[package]] name = "uuid" -version = "1.4.1" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79daa5ed5740825c40b389c5e50312b9c86df53fccd33f281df655642b43869d" +checksum = "88ad59a7560b41a70d191093a945f0b87bc1deeda46fb237479708a1d6b6cdfc" dependencies = [ "getrandom", "serde", diff --git a/meilitool/Cargo.toml b/meilitool/Cargo.toml index 7bd2f9eb8..58acd87db 100644 --- a/meilitool/Cargo.toml +++ b/meilitool/Cargo.toml @@ -10,7 +10,7 @@ license.workspace = true [dependencies] anyhow = "1.0.75" -clap = { version = "4.4.7", features = ["derive"] } +clap = { version = "4.2.1", features = ["derive"] } dump = { path = "../dump" } file-store = { path = "../file-store" } meilisearch-auth = { path = "../meilisearch-auth" } From ce5647e73087321d703525ef3ea13d3ae2dcfdbc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Mon, 30 Oct 2023 17:27:59 +0100 Subject: [PATCH 13/17] Fix Dockerfile WORKDIR path --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 46e98bdaf..bf98cbeca 100644 --- a/Dockerfile +++ b/Dockerfile @@ -30,8 +30,8 @@ RUN apk update --quiet \ # add meilisearch and meilitool to the `/bin` so you can run it from anywhere # and it's easy to find. -COPY --from=compiler /meilisearch/target/release/meilisearch /bin/meilisearch -COPY --from=compiler /meilisearch/target/release/meilitool /bin/meilitool +COPY --from=compiler /target/release/meilisearch /bin/meilisearch +COPY --from=compiler /target/release/meilitool /bin/meilitool # To stay compatible with the older version of the container (pre v0.27.0) we're # going to symlink the meilisearch binary in the path to `/meilisearch` RUN ln -s /bin/meilisearch /meilisearch From a2d0c73b411844b49f37b6f9a9cbc02f26765c5e Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Fri, 10 Nov 2023 10:50:19 +0100 Subject: [PATCH 14/17] Save the currently updating index so that the search can access it at all times --- index-scheduler/src/batch.rs | 6 ++++++ index-scheduler/src/insta_snapshot.rs | 1 + index-scheduler/src/lib.rs | 14 ++++++++++++++ 3 files changed, 21 insertions(+) diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs index 3e2cc4281..264afadae 100644 --- a/index-scheduler/src/batch.rs +++ b/index-scheduler/src/batch.rs @@ -923,6 +923,9 @@ impl IndexScheduler { self.index_mapper.index(&rtxn, &index_uid)? }; + // the index operation can take a long time, so save this handle to make it available tothe search for the duration of the tick + *self.currently_updating_index.write().unwrap() = Some((index_uid.clone(), index.clone())); + let mut index_wtxn = index.write_txn()?; let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?; index_wtxn.commit()?; @@ -959,6 +962,9 @@ impl IndexScheduler { Batch::IndexUpdate { index_uid, primary_key, mut task } => { let rtxn = self.env.read_txn()?; let index = self.index_mapper.index(&rtxn, &index_uid)?; + // the index update can take a long time, so save this handle to make it available tothe search for the duration of the tick + *self.currently_updating_index.write().unwrap() = Some((index_uid.clone(), index.clone())); + if let Some(primary_key) = primary_key.clone() { let mut index_wtxn = index.write_txn()?; diff --git a/index-scheduler/src/insta_snapshot.rs b/index-scheduler/src/insta_snapshot.rs index f820ce99d..6096bad38 100644 --- a/index-scheduler/src/insta_snapshot.rs +++ b/index-scheduler/src/insta_snapshot.rs @@ -39,6 +39,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String { test_breakpoint_sdr: _, planned_failures: _, run_loop_iteration: _, + currently_updating_index: _, } = scheduler; let rtxn = env.read_txn().unwrap(); diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 3c61880bb..0194bdb9d 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -331,6 +331,10 @@ pub struct IndexScheduler { /// The path to the version file of Meilisearch. pub(crate) version_file_path: PathBuf, + /// A few types of long running batches of tasks that act on a single index set this field + /// so that a handle to the index is available from other threads (search) in an optimized manner. + currently_updating_index: Arc>>, + // ================= test // The next entry is dedicated to the tests. /// Provide a way to set a breakpoint in multiple part of the scheduler. @@ -374,6 +378,7 @@ impl IndexScheduler { dumps_path: self.dumps_path.clone(), auth_path: self.auth_path.clone(), version_file_path: self.version_file_path.clone(), + currently_updating_index: self.currently_updating_index.clone(), #[cfg(test)] test_breakpoint_sdr: self.test_breakpoint_sdr.clone(), #[cfg(test)] @@ -470,6 +475,7 @@ impl IndexScheduler { snapshots_path: options.snapshots_path, auth_path: options.auth_path, version_file_path: options.version_file_path, + currently_updating_index: Arc::new(RwLock::new(None)), #[cfg(test)] test_breakpoint_sdr, @@ -652,6 +658,11 @@ impl IndexScheduler { /// If you need to fetch information from or perform an action on all indexes, /// see the `try_for_each_index` function. pub fn index(&self, name: &str) -> Result { + if let Some((current_name, current_index)) = self.currently_updating_index.read().unwrap().as_ref() { + if current_name == name { + return Ok(current_index.clone()) + } + } let rtxn = self.env.read_txn()?; self.index_mapper.index(&rtxn, name) } @@ -1133,6 +1144,9 @@ impl IndexScheduler { handle.join().unwrap_or(Err(Error::ProcessBatchPanicked)) }; + // Reset the currently updating index to relinquish the index handle + *self.currently_updating_index.write().unwrap() = None; + #[cfg(test)] self.maybe_fail(tests::FailureLocation::AcquiringWtxn)?; From 492fc086f0a37fffb7d3db34880820c46115434f Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Sun, 12 Nov 2023 21:53:11 +0100 Subject: [PATCH 15/17] cargo fmt --- index-scheduler/src/batch.rs | 7 ++++--- index-scheduler/src/lib.rs | 6 ++++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs index 264afadae..96cb85562 100644 --- a/index-scheduler/src/batch.rs +++ b/index-scheduler/src/batch.rs @@ -924,7 +924,8 @@ impl IndexScheduler { }; // the index operation can take a long time, so save this handle to make it available tothe search for the duration of the tick - *self.currently_updating_index.write().unwrap() = Some((index_uid.clone(), index.clone())); + *self.currently_updating_index.write().unwrap() = + Some((index_uid.clone(), index.clone())); let mut index_wtxn = index.write_txn()?; let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?; @@ -963,8 +964,8 @@ impl IndexScheduler { let rtxn = self.env.read_txn()?; let index = self.index_mapper.index(&rtxn, &index_uid)?; // the index update can take a long time, so save this handle to make it available tothe search for the duration of the tick - *self.currently_updating_index.write().unwrap() = Some((index_uid.clone(), index.clone())); - + *self.currently_updating_index.write().unwrap() = + Some((index_uid.clone(), index.clone())); if let Some(primary_key) = primary_key.clone() { let mut index_wtxn = index.write_txn()?; diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 0194bdb9d..95902aa15 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -658,9 +658,11 @@ impl IndexScheduler { /// If you need to fetch information from or perform an action on all indexes, /// see the `try_for_each_index` function. pub fn index(&self, name: &str) -> Result { - if let Some((current_name, current_index)) = self.currently_updating_index.read().unwrap().as_ref() { + if let Some((current_name, current_index)) = + self.currently_updating_index.read().unwrap().as_ref() + { if current_name == name { - return Ok(current_index.clone()) + return Ok(current_index.clone()); } } let rtxn = self.env.read_txn()?; From 8c649d8061a424852d8a040ed7726157c0dbcc19 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 9 Nov 2023 17:45:04 +0100 Subject: [PATCH 16/17] Throw error when the vector search is sent with the wrong size --- milli/src/search/new/mod.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs index 361804426..6ceb78223 100644 --- a/milli/src/search/new/mod.rs +++ b/milli/src/search/new/mod.rs @@ -434,7 +434,18 @@ pub fn execute_search( let mut search = Search::default(); let docids = match ctx.index.vector_hnsw(ctx.txn)? { Some(hnsw) => { + if let Some(expected_size) = hnsw.iter().map(|(_, point)| point.len()).next() { + if vector.len() != expected_size { + return Err(UserError::InvalidVectorDimensions { + expected: expected_size, + found: vector.len(), + } + .into()); + } + } + let vector = NDotProductPoint::new(vector.clone()); + let neighbors = hnsw.search(&vector, &mut search); let mut docids = Vec::new(); From a2d6dc857112ff10bc9f72f770d36f0a745ccd3e Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 13 Nov 2023 10:44:36 +0100 Subject: [PATCH 17/17] Fix typo, remove caching for the change of index --- index-scheduler/src/batch.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs index 96cb85562..aa93cda2a 100644 --- a/index-scheduler/src/batch.rs +++ b/index-scheduler/src/batch.rs @@ -923,7 +923,7 @@ impl IndexScheduler { self.index_mapper.index(&rtxn, &index_uid)? }; - // the index operation can take a long time, so save this handle to make it available tothe search for the duration of the tick + // the index operation can take a long time, so save this handle to make it available to the search for the duration of the tick *self.currently_updating_index.write().unwrap() = Some((index_uid.clone(), index.clone())); @@ -963,9 +963,6 @@ impl IndexScheduler { Batch::IndexUpdate { index_uid, primary_key, mut task } => { let rtxn = self.env.read_txn()?; let index = self.index_mapper.index(&rtxn, &index_uid)?; - // the index update can take a long time, so save this handle to make it available tothe search for the duration of the tick - *self.currently_updating_index.write().unwrap() = - Some((index_uid.clone(), index.clone())); if let Some(primary_key) = primary_key.clone() { let mut index_wtxn = index.write_txn()?;