diff --git a/Cargo.lock b/Cargo.lock index edb937d85..b1e2ac465 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -620,6 +620,17 @@ dependencies = [ "vec_map", ] +[[package]] +name = "concat-arrays" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1df715824eb382e34b7afb7463b0247bf41538aeba731fba05241ecdb5dc3747" +dependencies = [ + "proc-macro2 1.0.27", + "quote 1.0.9", + "syn 1.0.73", +] + [[package]] name = "const_fn" version = "0.4.8" @@ -1191,7 +1202,7 @@ dependencies = [ [[package]] name = "heed" version = "0.12.0" -source = "git+https://github.com/Kerollmops/heed?tag=v0.12.0#6c0b95793a805dc598f05c119494e6c069de0326" +source = "git+https://github.com/Kerollmops/heed?tag=v0.12.1#8e5dc6d71c8166a8d7d0db059e6e51478942b551" dependencies = [ "byteorder", "heed-traits", @@ -1209,12 +1220,12 @@ dependencies = [ [[package]] name = "heed-traits" version = "0.7.0" -source = "git+https://github.com/Kerollmops/heed?tag=v0.12.0#6c0b95793a805dc598f05c119494e6c069de0326" +source = "git+https://github.com/Kerollmops/heed?tag=v0.12.1#8e5dc6d71c8166a8d7d0db059e6e51478942b551" [[package]] name = "heed-types" version = "0.7.2" -source = "git+https://github.com/Kerollmops/heed?tag=v0.12.0#6c0b95793a805dc598f05c119494e6c069de0326" +source = "git+https://github.com/Kerollmops/heed?tag=v0.12.1#8e5dc6d71c8166a8d7d0db059e6e51478942b551" dependencies = [ "bincode", "heed-traits", @@ -1647,7 +1658,7 @@ dependencies = [ "log", "main_error", "meilisearch-error", - "meilisearch-tokenizer", + "meilisearch-tokenizer 0.2.3", "memmap", "milli", "mime", @@ -1699,7 +1710,23 @@ dependencies = [ "once_cell", "slice-group-by", "unicode-segmentation", - "whatlang", + "whatlang 0.9.0", +] + +[[package]] +name = "meilisearch-tokenizer" +version = "0.2.4" +source = "git+https://github.com/meilisearch/Tokenizer.git?tag=v0.2.4#135d08dce465a756abaf6a1bcad70f315bda99b9" +dependencies = [ + "character_converter", + "cow-utils", + "deunicode", + "fst", + "jieba-rs", + "once_cell", + "slice-group-by", + "unicode-segmentation", + "whatlang 0.12.0", ] [[package]] @@ -1729,12 +1756,13 @@ dependencies = [ [[package]] name = "milli" -version = "0.7.2" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.7.2#007fec21fcb3accad807aa2f2cf66c168afb043f" +version = "0.8.0" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.8.0#200e98c211510750e2ead475b8571b533ac6caa1" dependencies = [ "bstr", "byteorder", "chrono", + "concat-arrays", "csv", "either", "flate2", @@ -1748,7 +1776,7 @@ dependencies = [ "linked-hash-map", "log", "logging_timer", - "meilisearch-tokenizer", + "meilisearch-tokenizer 0.2.4", "memmap", "obkv", "once_cell", @@ -1910,9 +1938,9 @@ dependencies = [ [[package]] name = "obkv" -version = "0.1.1" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddd8a5a0aa2f3adafe349259a5b3e21a19c388b792414c1161d60a69c1fa48e8" +checksum = "f69e48cd7c8e5bb52a1da1287fdbfd877c32673176583ce664cd63b201aba385" [[package]] name = "once_cell" @@ -3532,6 +3560,15 @@ dependencies = [ "hashbrown 0.7.2", ] +[[package]] +name = "whatlang" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a346d2eb29c03618693ed24a29d1acd0c3f2cb08ae58b9669d7461e033cf703" +dependencies = [ + "hashbrown 0.7.2", +] + [[package]] name = "whoami" version = "1.1.2" diff --git a/meilisearch-http/Cargo.toml b/meilisearch-http/Cargo.toml index f0186801a..6191ca2bb 100644 --- a/meilisearch-http/Cargo.toml +++ b/meilisearch-http/Cargo.toml @@ -42,7 +42,7 @@ fst = "0.4.5" futures = "0.3.7" futures-util = "0.3.8" grenad = { git = "https://github.com/Kerollmops/grenad.git", rev = "3adcb26" } -heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.0" } +heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.1" } http = "0.2.1" indexmap = { version = "1.3.2", features = ["serde-1"] } itertools = "0.10.0" @@ -51,7 +51,7 @@ main_error = "0.1.0" meilisearch-error = { path = "../meilisearch-error" } meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.3" } memmap = "0.7.0" -milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.7.2" } +milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.8.0" } mime = "0.3.16" num_cpus = "1.13.0" once_cell = "1.5.2" @@ -73,7 +73,7 @@ thiserror = "1.0.24" tokio = { version = "1", features = ["full"] } uuid = { version = "0.8.2", features = ["serde"] } walkdir = "2.3.2" -obkv = "0.1.1" +obkv = "0.2.0" pin-project = "1.0.7" whoami = { version = "1.1.2", optional = true } reqwest = { version = "0.11.3", features = ["json", "rustls-tls"], default-features = false, optional = true } diff --git a/meilisearch-http/src/index/mod.rs b/meilisearch-http/src/index/mod.rs index 85eff2746..4c963f1f5 100644 --- a/meilisearch-http/src/index/mod.rs +++ b/meilisearch-http/src/index/mod.rs @@ -6,7 +6,7 @@ use std::path::Path; use std::sync::Arc; use heed::{EnvOpenOptions, RoTxn}; -use milli::obkv_to_json; +use milli::{obkv_to_json, FieldId}; use serde::{de::Deserializer, Deserialize}; use serde_json::{Map, Value}; @@ -174,7 +174,7 @@ impl Index { txn: &heed::RoTxn, attributes_to_retrieve: &Option>, fields_ids_map: &milli::FieldsIdsMap, - ) -> Result> { + ) -> Result> { let mut displayed_fields_ids = match self.displayed_fields_ids(&txn)? { Some(ids) => ids.into_iter().collect::>(), None => fields_ids_map.iter().map(|(id, _)| id).collect(), diff --git a/meilisearch-http/src/index/search.rs b/meilisearch-http/src/index/search.rs index 537dffc0f..b2129ca53 100644 --- a/meilisearch-http/src/index/search.rs +++ b/meilisearch-http/src/index/search.rs @@ -281,9 +281,9 @@ fn compute_formatted_options( attr_to_highlight: &HashSet, attr_to_crop: &[String], query_crop_length: usize, - to_retrieve_ids: &BTreeSet, + to_retrieve_ids: &BTreeSet, fields_ids_map: &FieldsIdsMap, - displayed_ids: &BTreeSet, + displayed_ids: &BTreeSet, ) -> BTreeMap { let mut formatted_options = BTreeMap::new(); @@ -314,7 +314,7 @@ fn add_highlight_to_formatted_options( formatted_options: &mut BTreeMap, attr_to_highlight: &HashSet, fields_ids_map: &FieldsIdsMap, - displayed_ids: &BTreeSet, + displayed_ids: &BTreeSet, ) { for attr in attr_to_highlight { let new_format = FormatOptions { @@ -342,7 +342,7 @@ fn add_crop_to_formatted_options( attr_to_crop: &[String], crop_length: usize, fields_ids_map: &FieldsIdsMap, - displayed_ids: &BTreeSet, + displayed_ids: &BTreeSet, ) { for attr in attr_to_crop { let mut split = attr.rsplitn(2, ':'); @@ -382,7 +382,7 @@ fn add_crop_to_formatted_options( fn add_non_formatted_ids_to_formatted_options( formatted_options: &mut BTreeMap, - to_retrieve_ids: &BTreeSet, + to_retrieve_ids: &BTreeSet, ) { for id in to_retrieve_ids { formatted_options.entry(*id).or_insert(FormatOptions { @@ -395,7 +395,7 @@ fn add_non_formatted_ids_to_formatted_options( fn make_document( attributes_to_retrieve: &BTreeSet, field_ids_map: &FieldsIdsMap, - obkv: obkv::KvReader, + obkv: obkv::KvReaderU16, ) -> Result { let mut document = Document::new(); @@ -418,7 +418,7 @@ fn make_document( fn format_fields>( field_ids_map: &FieldsIdsMap, - obkv: obkv::KvReader, + obkv: obkv::KvReaderU16, formatter: &Formatter, matching_words: &impl Matcher, formatted_options: &BTreeMap,