From 99211eb375fec29fabb1e55422d8924260388838 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 28 May 2024 11:46:53 +0200 Subject: [PATCH 01/41] Introduce the SettingDiff only_additional_fields method --- milli/src/update/settings.rs | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 24b32b6fa..c3e4ab3fa 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -1112,6 +1112,26 @@ impl InnerIndexSettingsDiff { || self.old.proximity_precision != self.new.proximity_precision } + /// Returns only the additional searchable fields if any + /// other searchable field has been modified, returns None. + pub fn only_additional_fields(&self) -> Option> { + match (&self.old.user_defined_searchable_fields, &self.new.user_defined_searchable_fields) { + (None, None) | (Some(_), None) => None, + (None, Some(new)) => Some(new.iter().cloned().collect()), + (Some(old), Some(new)) => { + let old: HashSet<_> = old.iter().cloned().collect(); + let new: HashSet<_> = new.iter().cloned().collect(); + if old.difference(&new).next().is_none() { + // if no field has been removed + // return only the additional ones + Some(&new - &old) + } else { + None + } + } + } + } + pub fn reindex_facets(&self) -> bool { let existing_fields = &self.new.existing_fields; if existing_fields.iter().any(|field| field.contains('.')) { From 9af103a88eb2df6467919e2b1cd9b27280d050e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 28 May 2024 14:53:45 +0200 Subject: [PATCH 02/41] Introducing a new into_del_add_obkv_conditional_operation function --- milli/src/update/del_add.rs | 15 ++++++ milli/src/update/index_documents/transform.rs | 47 +++++++++++++++---- 2 files changed, 53 insertions(+), 9 deletions(-) diff --git a/milli/src/update/del_add.rs b/milli/src/update/del_add.rs index 0288858ed..cb5e448f1 100644 --- a/milli/src/update/del_add.rs +++ b/milli/src/update/del_add.rs @@ -40,11 +40,26 @@ pub fn into_del_add_obkv( operation: DelAddOperation, buffer: &mut Vec, ) -> Result<(), std::io::Error> { + into_del_add_obkv_conditional_operation(reader, buffer, |_| operation) +} + +/// Akin to the [into_del_add_obkv] function but lets you +/// conditionally define the `DelAdd` variant based on the obkv key. +pub fn into_del_add_obkv_conditional_operation( + reader: obkv::KvReader, + buffer: &mut Vec, + operation: F, +) -> std::io::Result<()> +where + K: obkv::Key + PartialOrd, + F: Fn(K) -> DelAddOperation, +{ let mut writer = obkv::KvWriter::new(buffer); let mut value_buffer = Vec::new(); for (key, value) in reader.iter() { value_buffer.clear(); let mut value_writer = KvWriterDelAdd::new(&mut value_buffer); + let operation = operation(key); if matches!(operation, DelAddOperation::Deletion | DelAddOperation::DeletionAndAddition) { value_writer.insert(DelAdd::Deletion, value)?; } diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index 41a0a55cf..dc6642b8a 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -20,7 +20,10 @@ use super::{IndexDocumentsMethod, IndexerConfig}; use crate::documents::{DocumentsBatchIndex, EnrichedDocument, EnrichedDocumentsBatchReader}; use crate::error::{Error, InternalError, UserError}; use crate::index::{db_name, main_key}; -use crate::update::del_add::{into_del_add_obkv, DelAdd, DelAddOperation, KvReaderDelAdd}; +use crate::update::del_add::{ + into_del_add_obkv, into_del_add_obkv_conditional_operation, DelAdd, DelAddOperation, + KvReaderDelAdd, +}; use crate::update::index_documents::GrenadParameters; use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff}; use crate::update::{AvailableDocumentsIds, UpdateIndexingStep}; @@ -841,13 +844,28 @@ impl<'a, 'i> Transform<'a, 'i> { let is_primary_key = |id: FieldId| -> bool { settings_diff.primary_key_id == Some(id) }; // If only the `searchableAttributes` has been changed, keep only the searchable fields. + // However, if only new searchable attributes are added, this function will + // return false has fields do not need to be reindexed. let must_reindex_searchables = settings_diff.reindex_searchable(); - let necessary_searchable_field = |id: FieldId| -> bool { - must_reindex_searchables + let must_index_only_additional_searchables = &settings_diff.only_additional_fields(); + let necessary_searchable_field_to_reindex = move |id: FieldId| -> bool { + must_index_only_additional_searchables.is_none() + && must_reindex_searchables && (settings_diff.old.searchable_fields_ids.contains(&id) || settings_diff.new.searchable_fields_ids.contains(&id)) }; + // If only new `searchableAttributes` are present, keep only those ones. + let additional_searchable_field_only = move |id: FieldId| -> bool { + match must_index_only_additional_searchables { + Some(additional_fields) => { + let additional_field = settings_diff.new.fields_ids_map.name(id).unwrap(); + additional_fields.contains(additional_field) + } + None => false, + } + }; + // If only a faceted field has been added, keep only this field. let must_reindex_facets = settings_diff.reindex_facets(); let necessary_faceted_field = |id: FieldId| -> bool { @@ -862,14 +880,21 @@ impl<'a, 'i> Transform<'a, 'i> { // we need the fields for the prompt/templating. let reindex_vectors = settings_diff.reindex_vectors(); + // The set of additional searchable fields only, + // the only purpose of these fields is to be indexed from scratch. + let mut additional_searchables_only = HashSet::new(); + let mut obkv_writer = KvWriter::<_, FieldId>::memory(); for (id, val) in old_obkv.iter() { if is_primary_key(id) - || necessary_searchable_field(id) + || necessary_searchable_field_to_reindex(id) || necessary_faceted_field(id) || reindex_vectors { obkv_writer.insert(id, val)?; + } else if additional_searchable_field_only(id) { + additional_searchables_only.insert(id); + obkv_writer.insert(id, val)?; } } let data = obkv_writer.into_inner()?; @@ -887,11 +912,15 @@ impl<'a, 'i> Transform<'a, 'i> { let flattened = flattened.as_deref().map_or(obkv, KvReader::new); flattened_obkv_buffer.clear(); - into_del_add_obkv( - flattened, - DelAddOperation::DeletionAndAddition, - flattened_obkv_buffer, - )?; + into_del_add_obkv_conditional_operation(flattened, flattened_obkv_buffer, |id| { + // If the field is only required because it is an additional + // searchable field only define it as an DelAdd::Addition only. + if additional_searchables_only.contains(&id) { + DelAddOperation::Addition + } else { + DelAddOperation::DeletionAndAddition + } + })?; } Ok(()) From db3887929f717ee8c2712c838f49adc6ed90a6f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 29 May 2024 14:49:09 +0200 Subject: [PATCH 03/41] Fix an issue with settings diff and * in the searchable attributes --- milli/src/update/settings.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index c3e4ab3fa..af37a205c 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -1112,12 +1112,11 @@ impl InnerIndexSettingsDiff { || self.old.proximity_precision != self.new.proximity_precision } - /// Returns only the additional searchable fields if any - /// other searchable field has been modified, returns None. + /// Returns only the additional searchable fields. + /// If any other searchable field has been modified, returns None. pub fn only_additional_fields(&self) -> Option> { match (&self.old.user_defined_searchable_fields, &self.new.user_defined_searchable_fields) { - (None, None) | (Some(_), None) => None, - (None, Some(new)) => Some(new.iter().cloned().collect()), + (None, None) | (Some(_), None) | (None, Some(_)) => None, // None means * (Some(old), Some(new)) => { let old: HashSet<_> = old.iter().cloned().collect(); let new: HashSet<_> = new.iter().cloned().collect(); From 4bf83f701c08272574f44f6a30daefaf261cbc50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 29 May 2024 14:50:00 +0200 Subject: [PATCH 04/41] Give the settings diff to the write_typed_chunk_into_index function --- milli/src/update/index_documents/mod.rs | 5 +++-- milli/src/update/index_documents/typed_chunk.rs | 4 +++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index afae8973a..2420463b4 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -369,6 +369,7 @@ where // Run extraction pipeline in parallel. pool.install(|| { + let settings_diff_cloned = settings_diff.clone(); rayon::spawn(move || { let child_span = tracing::trace_span!(target: "indexing::details", parent: ¤t_span, "extract_and_send_grenad_chunks"); let _enter = child_span.enter(); @@ -398,7 +399,7 @@ where pool_params, lmdb_writer_sx.clone(), primary_key_id, - settings_diff.clone(), + settings_diff_cloned, max_positions_per_attributes, ) }); @@ -425,7 +426,7 @@ where Err(status) => { if let Some(typed_chunks) = chunk_accumulator.pop_longest() { let (docids, is_merged_database) = - write_typed_chunk_into_index(typed_chunks, self.index, self.wtxn)?; + write_typed_chunk_into_index(self.wtxn, self.index, &settings_diff, typed_chunks)?; if !docids.is_empty() { final_documents_ids |= docids; let documents_seen_count = final_documents_ids.len(); diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs index 2ef7a8990..fcd8cfc17 100644 --- a/milli/src/update/index_documents/typed_chunk.rs +++ b/milli/src/update/index_documents/typed_chunk.rs @@ -123,8 +123,10 @@ impl TypedChunk { #[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")] pub(crate) fn write_typed_chunk_into_index( typed_chunks: Vec, - index: &Index, wtxn: &mut RwTxn, + index: &Index, + settings_diff: &InnerIndexSettingsDiff, + typed_chunks: Vec, ) -> Result<(RoaringBitmap, bool)> { let mut is_merged_database = false; match typed_chunks[0] { From bc210bdc006046bd70940a453104bb6256ae43ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 29 May 2024 15:17:51 +0200 Subject: [PATCH 05/41] Introduce a dedicated function to write proximity entries in database --- .../src/update/index_documents/typed_chunk.rs | 79 ++++++++++++++++--- 1 file changed, 69 insertions(+), 10 deletions(-) diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs index fcd8cfc17..4021dea08 100644 --- a/milli/src/update/index_documents/typed_chunk.rs +++ b/milli/src/update/index_documents/typed_chunk.rs @@ -7,7 +7,7 @@ use bytemuck::allocation::pod_collect_to_vec; use charabia::{Language, Script}; use grenad::{Merger, MergerBuilder}; use heed::types::Bytes; -use heed::RwTxn; +use heed::{BytesDecode, RwTxn}; use obkv::{KvReader, KvWriter}; use roaring::RoaringBitmap; @@ -20,13 +20,16 @@ use super::MergeFn; use crate::external_documents_ids::{DocumentOperation, DocumentOperationKind}; use crate::facet::FacetType; use crate::index::db_name::DOCUMENTS; +use crate::proximity::MAX_DISTANCE; use crate::update::del_add::{deladd_serialize_add_side, DelAdd, KvReaderDelAdd}; use crate::update::facet::FacetsUpdate; use crate::update::index_documents::helpers::{ as_cloneable_grenad, keep_latest_obkv, try_split_array_at, }; +use crate::update::settings::InnerIndexSettingsDiff; use crate::{ - lat_lng_to_xyz, DocumentId, FieldId, GeoPoint, Index, InternalError, Result, SerializationError, + lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, InternalError, + Result, SerializationError, U8StrStrCodec, }; /// This struct accumulates and group the TypedChunks @@ -122,7 +125,6 @@ impl TypedChunk { /// Return new documents seen. #[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")] pub(crate) fn write_typed_chunk_into_index( - typed_chunks: Vec, wtxn: &mut RwTxn, index: &Index, settings_diff: &InnerIndexSettingsDiff, @@ -487,13 +489,22 @@ pub(crate) fn write_typed_chunk_into_index( } let merger = builder.build(); - write_entries_into_database( - merger, - &index.word_pair_proximity_docids, - wtxn, - deladd_serialize_add_side, - merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap, - )?; + if settings_diff.only_additional_fields().is_some() { + write_proximity_entries_into_database_additional_searchables( + merger, + &index.word_pair_proximity_docids, + wtxn, + )?; + } else { + write_entries_into_database( + merger, + &index.word_pair_proximity_docids, + wtxn, + deladd_serialize_add_side, + merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap, + )?; + } + is_merged_database = true; } TypedChunk::FieldIdDocidFacetNumbers(_) => { @@ -832,3 +843,51 @@ where } Ok(()) } + +/// Akin to the `write_entries_into_database` function but specialized +/// for the case when we only index additional searchable fields only. +#[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")] +fn write_proximity_entries_into_database_additional_searchables( + merger: Merger, + database: &heed::Database, + wtxn: &mut RwTxn, +) -> Result<()> +where + R: io::Read + io::Seek, +{ + let mut iter = merger.into_stream_merger_iter()?; + while let Some((key, value)) = iter.next()? { + if valid_lmdb_key(key) { + let (proximity_to_insert, word1, word2) = + U8StrStrCodec::bytes_decode(key).map_err(heed::Error::Decoding)?; + let data_to_insert = match KvReaderDelAdd::new(value).get(DelAdd::Addition) { + Some(value) => { + CboRoaringBitmapCodec::bytes_decode(value).map_err(heed::Error::Decoding)? + } + None => continue, + }; + + let mut data_to_remove = RoaringBitmap::new(); + for prox in 1..(MAX_DISTANCE as u8) { + let key = (prox, word1, word2); + let database_value = database.get(wtxn, &key)?.unwrap_or_default(); + let value = if prox == proximity_to_insert { + // Proximity that should be changed. + // Union values and remove lower proximity data + (&database_value | &data_to_insert) - &data_to_remove + } else { + // Remove lower proximity data + &database_value - &data_to_remove + }; + + // add the current data in data_to_remove for the next proximities + data_to_remove |= &value; + + if database_value != value { + database.put(wtxn, &key, &value)?; + } + } + } + } + Ok(()) +} From d9e50741896869631206555e5727b0bd61824c9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 29 May 2024 17:46:28 +0200 Subject: [PATCH 06/41] Introduce a new way to determine the operations to perform on the fields --- milli/src/update/index_documents/transform.rs | 47 +++---------------- milli/src/update/settings.rs | 28 ++++++++++- 2 files changed, 34 insertions(+), 41 deletions(-) diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index dc6642b8a..34e40b7f6 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -843,29 +843,6 @@ impl<'a, 'i> Transform<'a, 'i> { // Always keep the primary key. let is_primary_key = |id: FieldId| -> bool { settings_diff.primary_key_id == Some(id) }; - // If only the `searchableAttributes` has been changed, keep only the searchable fields. - // However, if only new searchable attributes are added, this function will - // return false has fields do not need to be reindexed. - let must_reindex_searchables = settings_diff.reindex_searchable(); - let must_index_only_additional_searchables = &settings_diff.only_additional_fields(); - let necessary_searchable_field_to_reindex = move |id: FieldId| -> bool { - must_index_only_additional_searchables.is_none() - && must_reindex_searchables - && (settings_diff.old.searchable_fields_ids.contains(&id) - || settings_diff.new.searchable_fields_ids.contains(&id)) - }; - - // If only new `searchableAttributes` are present, keep only those ones. - let additional_searchable_field_only = move |id: FieldId| -> bool { - match must_index_only_additional_searchables { - Some(additional_fields) => { - let additional_field = settings_diff.new.fields_ids_map.name(id).unwrap(); - additional_fields.contains(additional_field) - } - None => false, - } - }; - // If only a faceted field has been added, keep only this field. let must_reindex_facets = settings_diff.reindex_facets(); let necessary_faceted_field = |id: FieldId| -> bool { @@ -880,20 +857,16 @@ impl<'a, 'i> Transform<'a, 'i> { // we need the fields for the prompt/templating. let reindex_vectors = settings_diff.reindex_vectors(); - // The set of additional searchable fields only, - // the only purpose of these fields is to be indexed from scratch. - let mut additional_searchables_only = HashSet::new(); + // The operations that we must perform on the different fields. + let mut operations = HashMap::new(); let mut obkv_writer = KvWriter::<_, FieldId>::memory(); for (id, val) in old_obkv.iter() { - if is_primary_key(id) - || necessary_searchable_field_to_reindex(id) - || necessary_faceted_field(id) - || reindex_vectors - { + if is_primary_key(id) || necessary_faceted_field(id) || reindex_vectors { + operations.insert(id, DelAddOperation::DeletionAndAddition); obkv_writer.insert(id, val)?; - } else if additional_searchable_field_only(id) { - additional_searchables_only.insert(id); + } else if let Some(operation) = settings_diff.reindex_searchable_id(id) { + operations.insert(id, operation); obkv_writer.insert(id, val)?; } } @@ -913,13 +886,7 @@ impl<'a, 'i> Transform<'a, 'i> { flattened_obkv_buffer.clear(); into_del_add_obkv_conditional_operation(flattened, flattened_obkv_buffer, |id| { - // If the field is only required because it is an additional - // searchable field only define it as an DelAdd::Addition only. - if additional_searchables_only.contains(&id) { - DelAddOperation::Addition - } else { - DelAddOperation::DeletionAndAddition - } + operations.get(&id).copied().unwrap_or(DelAddOperation::DeletionAndAddition) })?; } diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index af37a205c..b401adff9 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -9,6 +9,7 @@ use itertools::{EitherOrBoth, Itertools}; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use time::OffsetDateTime; +use super::del_add::DelAddOperation; use super::index_documents::{IndexDocumentsConfig, Transform}; use super::IndexerConfig; use crate::criterion::Criterion; @@ -1112,6 +1113,31 @@ impl InnerIndexSettingsDiff { || self.old.proximity_precision != self.new.proximity_precision } + pub fn reindex_searchable_id(&self, id: FieldId) -> Option { + if self.old.stop_words.as_ref().map(|set| set.as_fst().as_bytes()) + != self.new.stop_words.as_ref().map(|set| set.as_fst().as_bytes()) + || self.old.allowed_separators != self.new.allowed_separators + || self.old.dictionary != self.new.dictionary + || self.old.exact_attributes != self.new.exact_attributes + // Here we can be much more optimal by just deleting the proximity database + || self.old.proximity_precision != self.new.proximity_precision + { + Some(DelAddOperation::DeletionAndAddition) + } else if let Some(only_additional_fields) = self.only_additional_fields() { + let additional_field = self.new.fields_ids_map.name(id).unwrap(); + if only_additional_fields.contains(additional_field) { + Some(DelAddOperation::Addition) + } else { + None + } + } else if self.old.user_defined_searchable_fields != self.new.user_defined_searchable_fields + { + Some(DelAddOperation::DeletionAndAddition) + } else { + None + } + } + /// Returns only the additional searchable fields. /// If any other searchable field has been modified, returns None. pub fn only_additional_fields(&self) -> Option> { @@ -1599,7 +1625,7 @@ mod tests { // When we search for something that is not in // the searchable fields it must not return any document. let result = index.search(&rtxn).query("23").execute().unwrap(); - assert!(result.documents_ids.is_empty()); + assert_eq!(result.documents_ids, Vec::::new()); // When we search for something that is in the searchable fields // we must find the appropriate document. From 3a78e988dad814f0b781e568d349ecceb20be0a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 30 May 2024 11:17:03 +0200 Subject: [PATCH 07/41] Reduce the number of complex calls to settings diff functions --- milli/src/update/index_documents/transform.rs | 14 ++-- .../src/update/index_documents/typed_chunk.rs | 2 +- milli/src/update/settings.rs | 69 ++++++++++++------- 3 files changed, 53 insertions(+), 32 deletions(-) diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index 34e40b7f6..59bab36e8 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -808,13 +808,15 @@ impl<'a, 'i> Transform<'a, 'i> { let mut new_inner_settings = old_inner_settings.clone(); new_inner_settings.fields_ids_map = fields_ids_map; - let settings_diff = InnerIndexSettingsDiff { - old: old_inner_settings, - new: new_inner_settings, + let embedding_configs_updated = false; + let settings_update_only = false; + let settings_diff = InnerIndexSettingsDiff::new( + old_inner_settings, + new_inner_settings, primary_key_id, - embedding_configs_updated: false, - settings_update_only: false, - }; + embedding_configs_updated, + settings_update_only, + ); Ok(TransformOutput { primary_key, diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs index 4021dea08..2fbe91685 100644 --- a/milli/src/update/index_documents/typed_chunk.rs +++ b/milli/src/update/index_documents/typed_chunk.rs @@ -489,7 +489,7 @@ pub(crate) fn write_typed_chunk_into_index( } let merger = builder.build(); - if settings_diff.only_additional_fields().is_some() { + if settings_diff.only_additional_fields.is_some() { write_proximity_entries_into_database_additional_searchables( merger, &index.word_pair_proximity_docids, diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index b401adff9..84eccf8e6 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -1073,13 +1073,14 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { .index .primary_key(self.wtxn)? .and_then(|name| new_inner_settings.fields_ids_map.id(name)); - let inner_settings_diff = InnerIndexSettingsDiff { - old: old_inner_settings, - new: new_inner_settings, + let settings_update_only = true; + let inner_settings_diff = InnerIndexSettingsDiff::new( + old_inner_settings, + new_inner_settings, primary_key_id, embedding_configs_updated, - settings_update_only: true, - }; + settings_update_only, + ); if inner_settings_diff.any_reindexing_needed() { self.reindex(&progress_callback, &should_abort, inner_settings_diff)?; @@ -1096,9 +1097,46 @@ pub struct InnerIndexSettingsDiff { // TODO: compare directly the embedders. pub(crate) embedding_configs_updated: bool, pub(crate) settings_update_only: bool, + /// The set of only the additional searchable fields. + /// If any other searchable field has been modified, is set to None. + pub(crate) only_additional_fields: Option>, } impl InnerIndexSettingsDiff { + pub(crate) fn new( + old_settings: InnerIndexSettings, + new_settings: InnerIndexSettings, + primary_key_id: Option, + embedding_configs_updated: bool, + settings_update_only: bool, + ) -> Self { + let only_additional_fields = match ( + &old_settings.user_defined_searchable_fields, + &new_settings.user_defined_searchable_fields, + ) { + (None, None) | (Some(_), None) | (None, Some(_)) => None, // None means * + (Some(old), Some(new)) => { + let old: HashSet<_> = old.iter().cloned().collect(); + let new: HashSet<_> = new.iter().cloned().collect(); + if old.difference(&new).next().is_none() { + // if no field has been removed return only the additional ones + Some(&new - &old) + } else { + None + } + } + }; + + InnerIndexSettingsDiff { + old: old_settings, + new: new_settings, + primary_key_id, + embedding_configs_updated, + settings_update_only, + only_additional_fields, + } + } + pub fn any_reindexing_needed(&self) -> bool { self.reindex_searchable() || self.reindex_facets() || self.reindex_vectors() } @@ -1123,7 +1161,7 @@ impl InnerIndexSettingsDiff { || self.old.proximity_precision != self.new.proximity_precision { Some(DelAddOperation::DeletionAndAddition) - } else if let Some(only_additional_fields) = self.only_additional_fields() { + } else if let Some(only_additional_fields) = &self.only_additional_fields { let additional_field = self.new.fields_ids_map.name(id).unwrap(); if only_additional_fields.contains(additional_field) { Some(DelAddOperation::Addition) @@ -1138,25 +1176,6 @@ impl InnerIndexSettingsDiff { } } - /// Returns only the additional searchable fields. - /// If any other searchable field has been modified, returns None. - pub fn only_additional_fields(&self) -> Option> { - match (&self.old.user_defined_searchable_fields, &self.new.user_defined_searchable_fields) { - (None, None) | (Some(_), None) | (None, Some(_)) => None, // None means * - (Some(old), Some(new)) => { - let old: HashSet<_> = old.iter().cloned().collect(); - let new: HashSet<_> = new.iter().cloned().collect(); - if old.difference(&new).next().is_none() { - // if no field has been removed - // return only the additional ones - Some(&new - &old) - } else { - None - } - } - } - } - pub fn reindex_facets(&self) -> bool { let existing_fields = &self.new.existing_fields; if existing_fields.iter().any(|field| field.contains('.')) { From 0e9eb9eedb04066394dc9e9b5829ed9aaa05339a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 30 May 2024 12:08:27 +0200 Subject: [PATCH 08/41] Add a span for the settings diff creation --- milli/src/update/settings.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 84eccf8e6..eca250c19 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -1103,6 +1103,7 @@ pub struct InnerIndexSettingsDiff { } impl InnerIndexSettingsDiff { + #[tracing::instrument(level = "trace", skip_all, target = "indexing::settings")] pub(crate) fn new( old_settings: InnerIndexSettings, new_settings: InnerIndexSettings, From 75496af985d02fcf1259b7defd6b6d47a0b44864 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 30 May 2024 12:14:22 +0200 Subject: [PATCH 09/41] Add a span for the prepare_for_documents_reindexing --- milli/src/update/index_documents/transform.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index 59bab36e8..c34b7876a 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -899,6 +899,11 @@ impl<'a, 'i> Transform<'a, 'i> { /// of the index with the attributes reordered accordingly to the `FieldsIdsMap` given as argument. /// // TODO this can be done in parallel by using the rayon `ThreadPool`. + #[tracing::instrument( + level = "trace" + skip(self, wtxn, settings_diff), + target = "indexing::documents" + )] pub fn prepare_for_documents_reindexing( self, wtxn: &mut heed::RwTxn<'i>, From b9a0ff0dd6c282110bb8f331870ddb77e0ace530 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 30 May 2024 15:39:05 +0200 Subject: [PATCH 10/41] Cache a lot of operations to know if a field must be indexed --- milli/src/update/settings.rs | 40 +++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index eca250c19..11a249068 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -1100,6 +1100,12 @@ pub struct InnerIndexSettingsDiff { /// The set of only the additional searchable fields. /// If any other searchable field has been modified, is set to None. pub(crate) only_additional_fields: Option>, + + // Cache the check to see if all the stop_words, allowed_separators, dictionary, + // exact_attributes, proximity_precision are different. + pub(crate) cache_reindex_searchable_without_user_defined: bool, + // Cache the check to see if all the user_defined_searchables are different. + pub(crate) cache_user_defined_searchables: bool, } impl InnerIndexSettingsDiff { @@ -1128,6 +1134,18 @@ impl InnerIndexSettingsDiff { } }; + let cache_reindex_searchable_without_user_defined = { + old_settings.stop_words.as_ref().map(|set| set.as_fst().as_bytes()) + != new_settings.stop_words.as_ref().map(|set| set.as_fst().as_bytes()) + || old_settings.allowed_separators != new_settings.allowed_separators + || old_settings.dictionary != new_settings.dictionary + || old_settings.exact_attributes != new_settings.exact_attributes + || old_settings.proximity_precision != new_settings.proximity_precision + }; + + let cache_user_defined_searchables = old_settings.user_defined_searchable_fields + != new_settings.user_defined_searchable_fields; + InnerIndexSettingsDiff { old: old_settings, new: new_settings, @@ -1135,6 +1153,8 @@ impl InnerIndexSettingsDiff { embedding_configs_updated, settings_update_only, only_additional_fields, + cache_reindex_searchable_without_user_defined, + cache_user_defined_searchables, } } @@ -1143,24 +1163,11 @@ impl InnerIndexSettingsDiff { } pub fn reindex_searchable(&self) -> bool { - self.old.stop_words.as_ref().map(|set| set.as_fst().as_bytes()) - != self.new.stop_words.as_ref().map(|set| set.as_fst().as_bytes()) - || self.old.allowed_separators != self.new.allowed_separators - || self.old.dictionary != self.new.dictionary - || self.old.user_defined_searchable_fields != self.new.user_defined_searchable_fields - || self.old.exact_attributes != self.new.exact_attributes - || self.old.proximity_precision != self.new.proximity_precision + self.cache_reindex_searchable_without_user_defined || self.cache_user_defined_searchables } pub fn reindex_searchable_id(&self, id: FieldId) -> Option { - if self.old.stop_words.as_ref().map(|set| set.as_fst().as_bytes()) - != self.new.stop_words.as_ref().map(|set| set.as_fst().as_bytes()) - || self.old.allowed_separators != self.new.allowed_separators - || self.old.dictionary != self.new.dictionary - || self.old.exact_attributes != self.new.exact_attributes - // Here we can be much more optimal by just deleting the proximity database - || self.old.proximity_precision != self.new.proximity_precision - { + if self.cache_reindex_searchable_without_user_defined { Some(DelAddOperation::DeletionAndAddition) } else if let Some(only_additional_fields) = &self.only_additional_fields { let additional_field = self.new.fields_ids_map.name(id).unwrap(); @@ -1169,8 +1176,7 @@ impl InnerIndexSettingsDiff { } else { None } - } else if self.old.user_defined_searchable_fields != self.new.user_defined_searchable_fields - { + } else if self.cache_user_defined_searchables { Some(DelAddOperation::DeletionAndAddition) } else { None From c32d746069352b73904018d347689d1b62bf004b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 30 May 2024 16:46:57 +0200 Subject: [PATCH 11/41] Rename the embeddings workloads --- ...subset-hf-embeddings.json => embeddings-movies-subset-hf.json} | 0 ...{settings-add-embeddings.json => embeddings-settings-add.json} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename workloads/{movies-subset-hf-embeddings.json => embeddings-movies-subset-hf.json} (100%) rename workloads/{settings-add-embeddings.json => embeddings-settings-add.json} (100%) diff --git a/workloads/movies-subset-hf-embeddings.json b/workloads/embeddings-movies-subset-hf.json similarity index 100% rename from workloads/movies-subset-hf-embeddings.json rename to workloads/embeddings-movies-subset-hf.json diff --git a/workloads/settings-add-embeddings.json b/workloads/embeddings-settings-add.json similarity index 100% rename from workloads/settings-add-embeddings.json rename to workloads/embeddings-settings-add.json From 17c5ceeb9dc2f6574beed680b3ade879ab88ff7f Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Mon, 3 Jun 2024 11:42:57 +0200 Subject: [PATCH 12/41] iterate over the faceted fields instead of over the whole document --- .../extract/extract_fid_docid_facet_values.rs | 64 +++++++++++++------ 1 file changed, 46 insertions(+), 18 deletions(-) diff --git a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs index 3cbd7e49e..bcbb87a58 100644 --- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs +++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs @@ -1,5 +1,5 @@ use std::borrow::Cow; -use std::collections::BTreeMap; +use std::collections::{BTreeMap, BTreeSet}; use std::convert::TryInto; use std::fs::File; use std::io::{self, BufReader}; @@ -9,7 +9,7 @@ use std::result::Result as StdResult; use bytemuck::bytes_of; use grenad::Sorter; use heed::BytesEncode; -use itertools::EitherOrBoth; +use itertools::{merge_join_by, EitherOrBoth}; use ordered_float::OrderedFloat; use roaring::RoaringBitmap; use serde_json::{from_slice, Value}; @@ -18,7 +18,7 @@ use FilterableValues::{Empty, Null, Values}; use super::helpers::{create_sorter, keep_first, sorter_into_reader, GrenadParameters}; use crate::error::InternalError; use crate::facet::value_encoding::f64_into_bytes; -use crate::update::del_add::{DelAdd, KvWriterDelAdd}; +use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd}; use crate::update::index_documents::{create_writer, writer_into_reader}; use crate::update::settings::InnerIndexSettingsDiff; use crate::{CboRoaringBitmapCodec, DocumentId, Error, FieldId, Result, MAX_FACET_VALUE_LENGTH}; @@ -66,6 +66,11 @@ pub fn extract_fid_docid_facet_values( max_memory.map(|m| m / 2), ); + let old_faceted_fids: BTreeSet<_> = + settings_diff.old.faceted_fields_ids.iter().copied().collect(); + let new_faceted_fids: BTreeSet<_> = + settings_diff.new.faceted_fields_ids.iter().copied().collect(); + // The tuples represents the Del and Add side for a bitmap let mut facet_exists_docids = BTreeMap::::new(); let mut facet_is_null_docids = BTreeMap::::new(); @@ -78,11 +83,45 @@ pub fn extract_fid_docid_facet_values( let mut cursor = obkv_documents.into_cursor()?; while let Some((docid_bytes, value)) = cursor.move_on_next()? { let obkv = obkv::KvReader::new(value); + let get_document_json_value = move |field_id, side| { + obkv.get(field_id) + .map(KvReaderDelAdd::new) + .and_then(|kv| kv.get(side)) + .map(from_slice) + .transpose() + .map_err(InternalError::SerdeJson) + }; + // iterate over the faceted fields instead of over the whole document. + for eob in + merge_join_by(old_faceted_fids.iter(), new_faceted_fids.iter(), |old, new| old.cmp(new)) + { + let (field_id, del_value, add_value) = match eob { + EitherOrBoth::Left(&field_id) => { + let del_value = get_document_json_value(field_id, DelAdd::Deletion)?; - for (field_id, field_bytes) in obkv.iter() { - let delete_faceted = settings_diff.old.faceted_fields_ids.contains(&field_id); - let add_faceted = settings_diff.new.faceted_fields_ids.contains(&field_id); - if delete_faceted || add_faceted { + // deletion only + (field_id, del_value, None) + } + EitherOrBoth::Right(&field_id) => { + let add_value = get_document_json_value(field_id, DelAdd::Addition)?; + + // addition only + (field_id, None, add_value) + } + EitherOrBoth::Both(&field_id, _) => { + // during settings update, recompute the changing settings only. + if settings_diff.settings_update_only { + continue; + } + + let del_value = get_document_json_value(field_id, DelAdd::Deletion)?; + let add_value = get_document_json_value(field_id, DelAdd::Addition)?; + + (field_id, del_value, add_value) + } + }; + + if del_value.is_some() || add_value.is_some() { numbers_key_buffer.clear(); strings_key_buffer.clear(); @@ -98,17 +137,6 @@ pub fn extract_fid_docid_facet_values( numbers_key_buffer.extend_from_slice(docid_bytes); strings_key_buffer.extend_from_slice(docid_bytes); - let del_add_obkv = obkv::KvReader::new(field_bytes); - let del_value = match del_add_obkv.get(DelAdd::Deletion).filter(|_| delete_faceted) - { - Some(bytes) => Some(from_slice(bytes).map_err(InternalError::SerdeJson)?), - None => None, - }; - let add_value = match del_add_obkv.get(DelAdd::Addition).filter(|_| add_faceted) { - Some(bytes) => Some(from_slice(bytes).map_err(InternalError::SerdeJson)?), - None => None, - }; - // We insert the document id on the Del and the Add side if the field exists. let (ref mut del_exists, ref mut add_exists) = facet_exists_docids.entry(field_id).or_default(); From d29d4f88da692f3345830d0236e0782f01b9f7ca Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 4 Jun 2024 15:31:24 +0200 Subject: [PATCH 13/41] Skip iterating over documents when the faceted field list doesn't change --- .../extract/extract_fid_docid_facet_values.rs | 318 +++++++++--------- 1 file changed, 161 insertions(+), 157 deletions(-) diff --git a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs index bcbb87a58..810fa26a9 100644 --- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs +++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs @@ -66,11 +66,6 @@ pub fn extract_fid_docid_facet_values( max_memory.map(|m| m / 2), ); - let old_faceted_fids: BTreeSet<_> = - settings_diff.old.faceted_fields_ids.iter().copied().collect(); - let new_faceted_fids: BTreeSet<_> = - settings_diff.new.faceted_fields_ids.iter().copied().collect(); - // The tuples represents the Del and Add side for a bitmap let mut facet_exists_docids = BTreeMap::::new(); let mut facet_is_null_docids = BTreeMap::::new(); @@ -80,172 +75,181 @@ pub fn extract_fid_docid_facet_values( let mut numbers_key_buffer = Vec::new(); let mut strings_key_buffer = Vec::new(); - let mut cursor = obkv_documents.into_cursor()?; - while let Some((docid_bytes, value)) = cursor.move_on_next()? { - let obkv = obkv::KvReader::new(value); - let get_document_json_value = move |field_id, side| { - obkv.get(field_id) - .map(KvReaderDelAdd::new) - .and_then(|kv| kv.get(side)) - .map(from_slice) - .transpose() - .map_err(InternalError::SerdeJson) - }; - // iterate over the faceted fields instead of over the whole document. - for eob in - merge_join_by(old_faceted_fids.iter(), new_faceted_fids.iter(), |old, new| old.cmp(new)) - { - let (field_id, del_value, add_value) = match eob { - EitherOrBoth::Left(&field_id) => { - let del_value = get_document_json_value(field_id, DelAdd::Deletion)?; + let old_faceted_fids: BTreeSet<_> = + settings_diff.old.faceted_fields_ids.iter().copied().collect(); + let new_faceted_fids: BTreeSet<_> = + settings_diff.new.faceted_fields_ids.iter().copied().collect(); - // deletion only - (field_id, del_value, None) - } - EitherOrBoth::Right(&field_id) => { - let add_value = get_document_json_value(field_id, DelAdd::Addition)?; + if !settings_diff.settings_update_only || old_faceted_fids != new_faceted_fids { + let mut cursor = obkv_documents.into_cursor()?; + while let Some((docid_bytes, value)) = cursor.move_on_next()? { + let obkv = obkv::KvReader::new(value); + let get_document_json_value = move |field_id, side| { + obkv.get(field_id) + .map(KvReaderDelAdd::new) + .and_then(|kv| kv.get(side)) + .map(from_slice) + .transpose() + .map_err(InternalError::SerdeJson) + }; + // iterate over the faceted fields instead of over the whole document. + for eob in + merge_join_by(old_faceted_fids.iter(), new_faceted_fids.iter(), |old, new| { + old.cmp(new) + }) + { + let (field_id, del_value, add_value) = match eob { + EitherOrBoth::Left(&field_id) => { + let del_value = get_document_json_value(field_id, DelAdd::Deletion)?; - // addition only - (field_id, None, add_value) - } - EitherOrBoth::Both(&field_id, _) => { - // during settings update, recompute the changing settings only. - if settings_diff.settings_update_only { - continue; + // deletion only + (field_id, del_value, None) + } + EitherOrBoth::Right(&field_id) => { + let add_value = get_document_json_value(field_id, DelAdd::Addition)?; + + // addition only + (field_id, None, add_value) + } + EitherOrBoth::Both(&field_id, _) => { + // during settings update, recompute the changing settings only. + if settings_diff.settings_update_only { + continue; + } + + let del_value = get_document_json_value(field_id, DelAdd::Deletion)?; + let add_value = get_document_json_value(field_id, DelAdd::Addition)?; + + (field_id, del_value, add_value) + } + }; + + if del_value.is_some() || add_value.is_some() { + numbers_key_buffer.clear(); + strings_key_buffer.clear(); + + // Set key to the field_id + // Note: this encoding is consistent with FieldIdCodec + numbers_key_buffer.extend_from_slice(&field_id.to_be_bytes()); + strings_key_buffer.extend_from_slice(&field_id.to_be_bytes()); + + let document: [u8; 4] = docid_bytes[..4].try_into().ok().unwrap(); + let document = DocumentId::from_be_bytes(document); + + // For the other extraction tasks, prefix the key with the field_id and the document_id + numbers_key_buffer.extend_from_slice(docid_bytes); + strings_key_buffer.extend_from_slice(docid_bytes); + + // We insert the document id on the Del and the Add side if the field exists. + let (ref mut del_exists, ref mut add_exists) = + facet_exists_docids.entry(field_id).or_default(); + let (ref mut del_is_null, ref mut add_is_null) = + facet_is_null_docids.entry(field_id).or_default(); + let (ref mut del_is_empty, ref mut add_is_empty) = + facet_is_empty_docids.entry(field_id).or_default(); + + if del_value.is_some() { + del_exists.insert(document); + } + if add_value.is_some() { + add_exists.insert(document); } - let del_value = get_document_json_value(field_id, DelAdd::Deletion)?; - let add_value = get_document_json_value(field_id, DelAdd::Addition)?; + let del_geo_support = settings_diff + .old + .geo_fields_ids + .map_or(false, |(lat, lng)| field_id == lat || field_id == lng); + let add_geo_support = settings_diff + .new + .geo_fields_ids + .map_or(false, |(lat, lng)| field_id == lat || field_id == lng); + let del_filterable_values = + del_value.map(|value| extract_facet_values(&value, del_geo_support)); + let add_filterable_values = + add_value.map(|value| extract_facet_values(&value, add_geo_support)); - (field_id, del_value, add_value) - } - }; + // Those closures are just here to simplify things a bit. + let mut insert_numbers_diff = |del_numbers, add_numbers| { + insert_numbers_diff( + &mut fid_docid_facet_numbers_sorter, + &mut numbers_key_buffer, + del_numbers, + add_numbers, + ) + }; + let mut insert_strings_diff = |del_strings, add_strings| { + insert_strings_diff( + &mut fid_docid_facet_strings_sorter, + &mut strings_key_buffer, + del_strings, + add_strings, + ) + }; - if del_value.is_some() || add_value.is_some() { - numbers_key_buffer.clear(); - strings_key_buffer.clear(); - - // Set key to the field_id - // Note: this encoding is consistent with FieldIdCodec - numbers_key_buffer.extend_from_slice(&field_id.to_be_bytes()); - strings_key_buffer.extend_from_slice(&field_id.to_be_bytes()); - - let document: [u8; 4] = docid_bytes[..4].try_into().ok().unwrap(); - let document = DocumentId::from_be_bytes(document); - - // For the other extraction tasks, prefix the key with the field_id and the document_id - numbers_key_buffer.extend_from_slice(docid_bytes); - strings_key_buffer.extend_from_slice(docid_bytes); - - // We insert the document id on the Del and the Add side if the field exists. - let (ref mut del_exists, ref mut add_exists) = - facet_exists_docids.entry(field_id).or_default(); - let (ref mut del_is_null, ref mut add_is_null) = - facet_is_null_docids.entry(field_id).or_default(); - let (ref mut del_is_empty, ref mut add_is_empty) = - facet_is_empty_docids.entry(field_id).or_default(); - - if del_value.is_some() { - del_exists.insert(document); - } - if add_value.is_some() { - add_exists.insert(document); - } - - let del_geo_support = settings_diff - .old - .geo_fields_ids - .map_or(false, |(lat, lng)| field_id == lat || field_id == lng); - let add_geo_support = settings_diff - .new - .geo_fields_ids - .map_or(false, |(lat, lng)| field_id == lat || field_id == lng); - let del_filterable_values = - del_value.map(|value| extract_facet_values(&value, del_geo_support)); - let add_filterable_values = - add_value.map(|value| extract_facet_values(&value, add_geo_support)); - - // Those closures are just here to simplify things a bit. - let mut insert_numbers_diff = |del_numbers, add_numbers| { - insert_numbers_diff( - &mut fid_docid_facet_numbers_sorter, - &mut numbers_key_buffer, - del_numbers, - add_numbers, - ) - }; - let mut insert_strings_diff = |del_strings, add_strings| { - insert_strings_diff( - &mut fid_docid_facet_strings_sorter, - &mut strings_key_buffer, - del_strings, - add_strings, - ) - }; - - match (del_filterable_values, add_filterable_values) { - (None, None) => (), - (Some(del_filterable_values), None) => match del_filterable_values { - Null => { - del_is_null.insert(document); - } - Empty => { - del_is_empty.insert(document); - } - Values { numbers, strings } => { - insert_numbers_diff(numbers, vec![])?; - insert_strings_diff(strings, vec![])?; - } - }, - (None, Some(add_filterable_values)) => match add_filterable_values { - Null => { - add_is_null.insert(document); - } - Empty => { - add_is_empty.insert(document); - } - Values { numbers, strings } => { - insert_numbers_diff(vec![], numbers)?; - insert_strings_diff(vec![], strings)?; - } - }, - (Some(del_filterable_values), Some(add_filterable_values)) => { - match (del_filterable_values, add_filterable_values) { - (Null, Null) | (Empty, Empty) => (), - (Null, Empty) => { - del_is_null.insert(document); - add_is_empty.insert(document); - } - (Empty, Null) => { - del_is_empty.insert(document); - add_is_null.insert(document); - } - (Null, Values { numbers, strings }) => { - insert_numbers_diff(vec![], numbers)?; - insert_strings_diff(vec![], strings)?; + match (del_filterable_values, add_filterable_values) { + (None, None) => (), + (Some(del_filterable_values), None) => match del_filterable_values { + Null => { del_is_null.insert(document); } - (Empty, Values { numbers, strings }) => { - insert_numbers_diff(vec![], numbers)?; - insert_strings_diff(vec![], strings)?; + Empty => { del_is_empty.insert(document); } - (Values { numbers, strings }, Null) => { - add_is_null.insert(document); + Values { numbers, strings } => { insert_numbers_diff(numbers, vec![])?; insert_strings_diff(strings, vec![])?; } - (Values { numbers, strings }, Empty) => { - add_is_empty.insert(document); - insert_numbers_diff(numbers, vec![])?; - insert_strings_diff(strings, vec![])?; + }, + (None, Some(add_filterable_values)) => match add_filterable_values { + Null => { + add_is_null.insert(document); } - ( - Values { numbers: del_numbers, strings: del_strings }, - Values { numbers: add_numbers, strings: add_strings }, - ) => { - insert_numbers_diff(del_numbers, add_numbers)?; - insert_strings_diff(del_strings, add_strings)?; + Empty => { + add_is_empty.insert(document); + } + Values { numbers, strings } => { + insert_numbers_diff(vec![], numbers)?; + insert_strings_diff(vec![], strings)?; + } + }, + (Some(del_filterable_values), Some(add_filterable_values)) => { + match (del_filterable_values, add_filterable_values) { + (Null, Null) | (Empty, Empty) => (), + (Null, Empty) => { + del_is_null.insert(document); + add_is_empty.insert(document); + } + (Empty, Null) => { + del_is_empty.insert(document); + add_is_null.insert(document); + } + (Null, Values { numbers, strings }) => { + insert_numbers_diff(vec![], numbers)?; + insert_strings_diff(vec![], strings)?; + del_is_null.insert(document); + } + (Empty, Values { numbers, strings }) => { + insert_numbers_diff(vec![], numbers)?; + insert_strings_diff(vec![], strings)?; + del_is_empty.insert(document); + } + (Values { numbers, strings }, Null) => { + add_is_null.insert(document); + insert_numbers_diff(numbers, vec![])?; + insert_strings_diff(strings, vec![])?; + } + (Values { numbers, strings }, Empty) => { + add_is_empty.insert(document); + insert_numbers_diff(numbers, vec![])?; + insert_strings_diff(strings, vec![])?; + } + ( + Values { numbers: del_numbers, strings: del_strings }, + Values { numbers: add_numbers, strings: add_strings }, + ) => { + insert_numbers_diff(del_numbers, add_numbers)?; + insert_strings_diff(del_strings, add_strings)?; + } } } } From ba9fadc8f19d9ce32e1e7469f5dd154875fe2a04 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Wed, 5 Jun 2024 10:51:16 +0200 Subject: [PATCH 14/41] Put only_additional_fields to None if the difference gives an empty result. --- milli/src/update/settings.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 11a249068..952b017c6 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -1127,7 +1127,7 @@ impl InnerIndexSettingsDiff { let new: HashSet<_> = new.iter().cloned().collect(); if old.difference(&new).next().is_none() { // if no field has been removed return only the additional ones - Some(&new - &old) + Some(&new - &old).filter(|x| !x.is_empty()) } else { None } From ff87b4db261f10aeb4a0375d5766dd4ffd085341 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Wed, 5 Jun 2024 12:48:44 +0200 Subject: [PATCH 15/41] Avoid running proximity when only the exact attributes changes --- .../extract_word_pair_proximity_docids.rs | 9 ++++---- milli/src/update/settings.rs | 22 +++++++++++++++---- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs b/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs index 617338f9f..5a9363942 100644 --- a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs +++ b/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs @@ -26,11 +26,8 @@ pub fn extract_word_pair_proximity_docids( indexer: GrenadParameters, settings_diff: &InnerIndexSettingsDiff, ) -> Result>> { - let any_deletion = settings_diff.old.proximity_precision == ProximityPrecision::ByWord; - let any_addition = settings_diff.new.proximity_precision == ProximityPrecision::ByWord; - // early return if the data shouldn't be deleted nor created. - if !any_deletion && !any_addition { + if settings_diff.settings_update_only && !settings_diff.reindex_proximities() { let writer = create_writer( indexer.chunk_compression_type, indexer.chunk_compression_level, @@ -39,8 +36,10 @@ pub fn extract_word_pair_proximity_docids( return writer_into_reader(writer); } - let max_memory = indexer.max_memory_by_thread(); + let any_deletion = settings_diff.old.proximity_precision == ProximityPrecision::ByWord; + let any_addition = settings_diff.new.proximity_precision == ProximityPrecision::ByWord; + let max_memory = indexer.max_memory_by_thread(); let mut word_pair_proximity_docids_sorters: Vec<_> = (1..MAX_DISTANCE) .map(|_| { create_sorter( diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 952b017c6..dc26ac746 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -1104,8 +1104,10 @@ pub struct InnerIndexSettingsDiff { // Cache the check to see if all the stop_words, allowed_separators, dictionary, // exact_attributes, proximity_precision are different. pub(crate) cache_reindex_searchable_without_user_defined: bool, - // Cache the check to see if all the user_defined_searchables are different. + // Cache the check to see if the user_defined_searchables are different. pub(crate) cache_user_defined_searchables: bool, + // Cache the check to see if the exact_attributes are different. + pub(crate) cache_exact_attributes: bool, } impl InnerIndexSettingsDiff { @@ -1139,10 +1141,11 @@ impl InnerIndexSettingsDiff { != new_settings.stop_words.as_ref().map(|set| set.as_fst().as_bytes()) || old_settings.allowed_separators != new_settings.allowed_separators || old_settings.dictionary != new_settings.dictionary - || old_settings.exact_attributes != new_settings.exact_attributes || old_settings.proximity_precision != new_settings.proximity_precision }; + let cache_exact_attributes = old_settings.exact_attributes != new_settings.exact_attributes; + let cache_user_defined_searchables = old_settings.user_defined_searchable_fields != new_settings.user_defined_searchable_fields; @@ -1155,6 +1158,7 @@ impl InnerIndexSettingsDiff { only_additional_fields, cache_reindex_searchable_without_user_defined, cache_user_defined_searchables, + cache_exact_attributes, } } @@ -1163,11 +1167,21 @@ impl InnerIndexSettingsDiff { } pub fn reindex_searchable(&self) -> bool { - self.cache_reindex_searchable_without_user_defined || self.cache_user_defined_searchables + self.cache_reindex_searchable_without_user_defined + || self.cache_exact_attributes + || self.cache_user_defined_searchables + } + + pub fn reindex_proximities(&self) -> bool { + // if any searchable settings force the reindexing + (self.cache_reindex_searchable_without_user_defined || self.cache_user_defined_searchables) + // and if any settings needs the proximity database created + && (self.old.proximity_precision == ProximityPrecision::ByAttribute + || self.old.proximity_precision == ProximityPrecision::ByAttribute) } pub fn reindex_searchable_id(&self, id: FieldId) -> Option { - if self.cache_reindex_searchable_without_user_defined { + if self.cache_reindex_searchable_without_user_defined || self.cache_exact_attributes { Some(DelAddOperation::DeletionAndAddition) } else if let Some(only_additional_fields) = &self.only_additional_fields { let additional_field = self.new.fields_ids_map.name(id).unwrap(); From 33241a6b12566b784a56d158240972dfad05f079 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Wed, 5 Jun 2024 16:00:24 +0200 Subject: [PATCH 16/41] Fix condition mistake --- milli/src/update/settings.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index dc26ac746..be9b6b74e 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -1177,7 +1177,7 @@ impl InnerIndexSettingsDiff { (self.cache_reindex_searchable_without_user_defined || self.cache_user_defined_searchables) // and if any settings needs the proximity database created && (self.old.proximity_precision == ProximityPrecision::ByAttribute - || self.old.proximity_precision == ProximityPrecision::ByAttribute) + || self.new.proximity_precision == ProximityPrecision::ByAttribute) } pub fn reindex_searchable_id(&self, id: FieldId) -> Option { From 8b450b84f8e12d34b35ed32d54451db3ba325d49 Mon Sep 17 00:00:00 2001 From: Strift Date: Mon, 10 Jun 2024 17:45:14 +0200 Subject: [PATCH 17/41] Add june 11th webinar banner --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index 540a2c92b..639f59545 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,12 @@

⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍

+--- + +### 🔥 On June 11th, we are hosting a live product demo and Q&A about AI search, query suggestions, and v1.9. [Register now](https://us06web.zoom.us/meeting/register/tZMudO-qpz8qGdDQ_MNn3NmGV0m8WLf1uECM?utm_campaign=ossutm_source=github&utm_medium=meilisearch#/registration)! + +--- + [Meilisearch](https://www.meilisearch.com) helps you shape a delightful search experience in a snap, offering features that work out of the box to speed up your workflow.

From b347b66619e23a3e194805cdd37f0b34e4b1c91b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine?= Date: Tue, 18 Jun 2024 18:45:50 +0200 Subject: [PATCH 18/41] Revert "Add june 11th webinar banner" (#4705) --- README.md | 6 ------ 1 file changed, 6 deletions(-) diff --git a/README.md b/README.md index 639f59545..540a2c92b 100644 --- a/README.md +++ b/README.md @@ -25,12 +25,6 @@

⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍

---- - -### 🔥 On June 11th, we are hosting a live product demo and Q&A about AI search, query suggestions, and v1.9. [Register now](https://us06web.zoom.us/meeting/register/tZMudO-qpz8qGdDQ_MNn3NmGV0m8WLf1uECM?utm_campaign=ossutm_source=github&utm_medium=meilisearch#/registration)! - ---- - [Meilisearch](https://www.meilisearch.com) helps you shape a delightful search experience in a snap, offering features that work out of the box to speed up your workflow.

From 534f696b2965c4d601dfcb7c764c2451e852c8c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 20 Jun 2024 09:53:06 +0200 Subject: [PATCH 19/41] Update the README to link more demos (#4711) This Pull Request adds two new interesting demos to a brand new list, which replaces the short _Try it_ text just below the Where2Watch showcase image hoping people will notice them. --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 540a2c92b..e65d2f9b0 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,10 @@

-🔥 [**Try it!**](https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-link) 🔥 +## 🖥 Demos +- [**Where To Watch:**](https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-link) Find the movie you will watch tonight through an up-to-date IMDb dataset, [featuring Hybrid Search](https://meilisearch.notion.site/Where2Watch-Noticeable-Queries-881f6270abef4452a94255462df740d7). +- [**Millions of Musics:**](https://music.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-link) Search throught a 47 millions musics and artists dataset. +- [**Tenant Tokens:**](https://tenant-token.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-link) Experience the search with restricted results that leverage the use of tenant tokens. ## ✨ Features - **Hybrid search:** Combine the best of both [semantic](https://www.meilisearch.com/docs/learn/experimental/vector_search) & full-text search to get the most relevant results From fb683fe88ba5e044b5681767fee0495ff125fe0f Mon Sep 17 00:00:00 2001 From: karribalu Date: Thu, 20 Jun 2024 23:55:09 +0100 Subject: [PATCH 20/41] Fix bad http status and error message on wrong payload --- meilisearch/src/error.rs | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/meilisearch/src/error.rs b/meilisearch/src/error.rs index 5a0b04020..84feea995 100644 --- a/meilisearch/src/error.rs +++ b/meilisearch/src/error.rs @@ -98,14 +98,27 @@ impl From for aweb::Error { impl From for MeilisearchHttpError { fn from(error: aweb::error::PayloadError) -> Self { - MeilisearchHttpError::Payload(PayloadError::Payload(error)) + match error { + aweb::error::PayloadError::Incomplete(_) => { + MeilisearchHttpError::Payload(PayloadError::Payload(ActixPayloadError::IncompleteError)) + } + _ => MeilisearchHttpError::Payload(PayloadError::Payload(ActixPayloadError::OtherError(error))) + } } } +#[derive(Debug, thiserror::Error)] +pub enum ActixPayloadError { + #[error("The provided payload is incomplete and cannot be decompressed")] + IncompleteError, + #[error(transparent)] + OtherError(aweb::error::PayloadError) +} + #[derive(Debug, thiserror::Error)] pub enum PayloadError { #[error(transparent)] - Payload(aweb::error::PayloadError), + Payload(ActixPayloadError), #[error(transparent)] Json(JsonPayloadError), #[error(transparent)] @@ -122,13 +135,15 @@ impl ErrorCode for PayloadError { fn error_code(&self) -> Code { match self { PayloadError::Payload(e) => match e { - aweb::error::PayloadError::Incomplete(_) => Code::Internal, - aweb::error::PayloadError::EncodingCorrupted => Code::Internal, - aweb::error::PayloadError::Overflow => Code::PayloadTooLarge, - aweb::error::PayloadError::UnknownLength => Code::Internal, - aweb::error::PayloadError::Http2Payload(_) => Code::Internal, - aweb::error::PayloadError::Io(_) => Code::Internal, - _ => todo!(), + ActixPayloadError::IncompleteError => Code::BadRequest, + ActixPayloadError::OtherError(error) => match error { + aweb::error::PayloadError::EncodingCorrupted => Code::Internal, + aweb::error::PayloadError::Overflow => Code::PayloadTooLarge, + aweb::error::PayloadError::UnknownLength => Code::Internal, + aweb::error::PayloadError::Http2Payload(_) => Code::Internal, + aweb::error::PayloadError::Io(_) => Code::Internal, + _ => todo!(), + } }, PayloadError::Json(err) => match err { JsonPayloadError::Overflow { .. } => Code::PayloadTooLarge, From 2a38f5c7571f49d33e315ad59ed7f28e9a203911 Mon Sep 17 00:00:00 2001 From: karribalu Date: Fri, 21 Jun 2024 00:14:26 +0100 Subject: [PATCH 21/41] Run Rustfmt --- meilisearch/src/error.rs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/meilisearch/src/error.rs b/meilisearch/src/error.rs index 84feea995..3a38b36d1 100644 --- a/meilisearch/src/error.rs +++ b/meilisearch/src/error.rs @@ -99,10 +99,12 @@ impl From for aweb::Error { impl From for MeilisearchHttpError { fn from(error: aweb::error::PayloadError) -> Self { match error { - aweb::error::PayloadError::Incomplete(_) => { - MeilisearchHttpError::Payload(PayloadError::Payload(ActixPayloadError::IncompleteError)) - } - _ => MeilisearchHttpError::Payload(PayloadError::Payload(ActixPayloadError::OtherError(error))) + aweb::error::PayloadError::Incomplete(_) => MeilisearchHttpError::Payload( + PayloadError::Payload(ActixPayloadError::IncompleteError), + ), + _ => MeilisearchHttpError::Payload(PayloadError::Payload( + ActixPayloadError::OtherError(error), + )), } } } @@ -112,7 +114,7 @@ pub enum ActixPayloadError { #[error("The provided payload is incomplete and cannot be decompressed")] IncompleteError, #[error(transparent)] - OtherError(aweb::error::PayloadError) + OtherError(aweb::error::PayloadError), } #[derive(Debug, thiserror::Error)] @@ -143,7 +145,7 @@ impl ErrorCode for PayloadError { aweb::error::PayloadError::Http2Payload(_) => Code::Internal, aweb::error::PayloadError::Io(_) => Code::Internal, _ => todo!(), - } + }, }, PayloadError::Json(err) => match err { JsonPayloadError::Overflow { .. } => Code::PayloadTooLarge, From cade18bd4753321d08ae69aa514c26ad2c1b3136 Mon Sep 17 00:00:00 2001 From: Strift Date: Mon, 24 Jun 2024 15:47:10 +0200 Subject: [PATCH 22/41] Update README.md (#4721) --- README.md | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index e65d2f9b0..d806ed963 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@

⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍

-[Meilisearch](https://www.meilisearch.com) helps you shape a delightful search experience in a snap, offering features that work out of the box to speed up your workflow. +[Meilisearch](https://www.meilisearch.com?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=intro) helps you shape a delightful search experience in a snap, offering features that work out of the box to speed up your workflow.

@@ -36,14 +36,18 @@

-## 🖥 Demos -- [**Where To Watch:**](https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-link) Find the movie you will watch tonight through an up-to-date IMDb dataset, [featuring Hybrid Search](https://meilisearch.notion.site/Where2Watch-Noticeable-Queries-881f6270abef4452a94255462df740d7). -- [**Millions of Musics:**](https://music.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-link) Search throught a 47 millions musics and artists dataset. -- [**Tenant Tokens:**](https://tenant-token.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-link) Experience the search with restricted results that leverage the use of tenant tokens. +## 🖥 Examples + +- [**Movies**](https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=organization) — An application to help you find streaming platforms to watch movies using [hybrid search](https://www.meilisearch.com/solutions/hybrid-search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos). +- [**Ecommerce**](https://ecommerce.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Ecommerce website using disjunctive [facets](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos), range and rating filtering, and pagination. +- [**Songs**](https://music.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Search through 47 million of songs. +- [**SaaS**](https://saas.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Search for contacts, deals, and companies in this [multi-tenant](https://www.meilisearch.com/docs/learn/security/multitenancy_tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) CRM application. + +See the list of all our example apps in our [demos repository](https://github.com/meilisearch/demos). ## ✨ Features -- **Hybrid search:** Combine the best of both [semantic](https://www.meilisearch.com/docs/learn/experimental/vector_search) & full-text search to get the most relevant results -- **Search-as-you-type:** find & display results in less than 50 milliseconds to provide an intuitive experience +- **Hybrid search:** Combine the best of both [semantic](https://www.meilisearch.com/docs/learn/experimental/vector_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) & full-text search to get the most relevant results +- **Search-as-you-type:** Find & display results in less than 50 milliseconds to provide an intuitive experience - **[Typo tolerance](https://www.meilisearch.com/docs/learn/configuration/typo_tolerance?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings - **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your users' search experience with custom filters and build a faceted search interface in a few lines of code - **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need @@ -62,7 +66,7 @@ You can consult Meilisearch's documentation at [meilisearch.com/docs](https://ww ## 🚀 Getting started -For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://www.meilisearch.com/docs/learn/getting_started/quick_start?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) guide. +For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [documentation](https://www.meilisearch.com/docs?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) guide. ## 🌍 Supercharge your Meilisearch experience @@ -86,7 +90,7 @@ Finally, for more in-depth information, refer to our articles explaining fundame ## 📊 Telemetry -Meilisearch collects **anonymized** data from users to help us improve our product. You can [deactivate this](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) whenever you want. +Meilisearch collects **anonymized** user data to help us improve our product. You can [deactivate this](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) whenever you want. To request deletion of collected data, please write to us at [privacy@meilisearch.com](mailto:privacy@meilisearch.com). Remember to include your `Instance UID` in the message, as this helps us quickly find and delete your data. @@ -108,11 +112,11 @@ Thank you for your support! ## 👩‍💻 Contributing -Meilisearch is, and will always be, open-source! If you want to contribute to the project, please take a look at [our contribution guidelines](CONTRIBUTING.md). +Meilisearch is, and will always be, open-source! If you want to contribute to the project, please look at [our contribution guidelines](CONTRIBUTING.md). ## 📦 Versioning -Meilisearch releases and their associated binaries are available [in this GitHub page](https://github.com/meilisearch/meilisearch/releases). +Meilisearch releases and their associated binaries are available on the project's [releases page](https://github.com/meilisearch/meilisearch/releases). The binaries are versioned following [SemVer conventions](https://semver.org/). To know more, read our [versioning policy](https://github.com/meilisearch/engine-team/blob/main/resources/versioning-policy.md). From 558b66e5352805b707f700258efe731c5c394095 Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 24 Jun 2024 19:00:44 +0200 Subject: [PATCH 23/41] makes most tests works with variable error messages --- meilisearch/tests/auth/authorization.rs | 2 +- meilisearch/tests/auth/tenant_token.rs | 19 +++++++++----- .../tests/auth/tenant_token_multi_search.rs | 25 +++++++++++++------ meilisearch/tests/common/mod.rs | 6 +++++ 4 files changed, 38 insertions(+), 14 deletions(-) diff --git a/meilisearch/tests/auth/authorization.rs b/meilisearch/tests/auth/authorization.rs index d26bb26b8..a57c9e11d 100644 --- a/meilisearch/tests/auth/authorization.rs +++ b/meilisearch/tests/auth/authorization.rs @@ -280,7 +280,7 @@ async fn access_authorized_no_index_restriction() { route, action ); - assert_ne!(code, 403); + assert_ne!(code, 403, "on route: {:?} - {:?} with action: {:?}", method, route, action); } } } diff --git a/meilisearch/tests/auth/tenant_token.rs b/meilisearch/tests/auth/tenant_token.rs index ba3b0b234..5e8a75c36 100644 --- a/meilisearch/tests/auth/tenant_token.rs +++ b/meilisearch/tests/auth/tenant_token.rs @@ -53,7 +53,8 @@ static DOCUMENTS: Lazy = Lazy::new(|| { }); static INVALID_RESPONSE: Lazy = Lazy::new(|| { - json!({"message": "The provided API key is invalid.", + json!({ + "message": null, "code": "invalid_api_key", "type": "auth", "link": "https://docs.meilisearch.com/errors#invalid_api_key" @@ -191,7 +192,9 @@ macro_rules! compute_forbidden_search { server.use_api_key(&web_token); let index = server.index("sales"); index - .search(json!({}), |response, code| { + .search(json!({}), |mut response, code| { + // We don't assert anything on the message since it may change between cases + response["message"] = serde_json::json!(null); assert_eq!( response, INVALID_RESPONSE.clone(), @@ -495,7 +498,8 @@ async fn error_access_forbidden_routes() { for ((method, route), actions) in AUTHORIZATIONS.iter() { if !actions.contains("search") { - let (response, code) = server.dummy_request(method, route).await; + let (mut response, code) = server.dummy_request(method, route).await; + response["message"] = serde_json::json!(null); assert_eq!(response, INVALID_RESPONSE.clone()); assert_eq!(code, 403); } @@ -529,14 +533,16 @@ async fn error_access_expired_parent_key() { server.use_api_key(&web_token); // test search request while parent_key is not expired - let (response, code) = server.dummy_request("POST", "/indexes/products/search").await; + let (mut response, code) = server.dummy_request("POST", "/indexes/products/search").await; + response["message"] = serde_json::json!(null); assert_ne!(response, INVALID_RESPONSE.clone()); assert_ne!(code, 403); // wait until the key is expired. thread::sleep(time::Duration::new(1, 0)); - let (response, code) = server.dummy_request("POST", "/indexes/products/search").await; + let (mut response, code) = server.dummy_request("POST", "/indexes/products/search").await; + response["message"] = serde_json::json!(null); assert_eq!(response, INVALID_RESPONSE.clone()); assert_eq!(code, 403); } @@ -585,7 +591,8 @@ async fn error_access_modified_token() { .join("."); server.use_api_key(&altered_token); - let (response, code) = server.dummy_request("POST", "/indexes/products/search").await; + let (mut response, code) = server.dummy_request("POST", "/indexes/products/search").await; + response["message"] = serde_json::json!(null); assert_eq!(response, INVALID_RESPONSE.clone()); assert_eq!(code, 403); } diff --git a/meilisearch/tests/auth/tenant_token_multi_search.rs b/meilisearch/tests/auth/tenant_token_multi_search.rs index 09b5dbbcc..81146d14e 100644 --- a/meilisearch/tests/auth/tenant_token_multi_search.rs +++ b/meilisearch/tests/auth/tenant_token_multi_search.rs @@ -109,9 +109,11 @@ static NESTED_DOCUMENTS: Lazy = Lazy::new(|| { fn invalid_response(query_index: Option) -> Value { let message = if let Some(query_index) = query_index { - format!("Inside `.queries[{query_index}]`: The provided API key is invalid.") + json!(format!("Inside `.queries[{query_index}]`: The provided API key is invalid.")) } else { - "The provided API key is invalid.".to_string() + // if it's anything else we simply return null and will tests all the + // error messages somewhere else + json!(null) }; json!({"message": message, "code": "invalid_api_key", @@ -414,7 +416,10 @@ macro_rules! compute_forbidden_single_search { for (tenant_token, failed_query_index) in $tenant_tokens.iter().zip(failed_query_indexes.into_iter()) { let web_token = generate_tenant_token(&uid, &key, tenant_token.clone()); server.use_api_key(&web_token); - let (response, code) = server.multi_search(json!({"queries" : [{"indexUid": "sales"}]})).await; + let (mut response, code) = server.multi_search(json!({"queries" : [{"indexUid": "sales"}]})).await; + if failed_query_index.is_none() && !response["message"].is_null() { + response["message"] = serde_json::json!(null); + } assert_eq!( response, invalid_response(failed_query_index), @@ -469,10 +474,13 @@ macro_rules! compute_forbidden_multiple_search { for (tenant_token, failed_query_index) in $tenant_tokens.iter().zip(failed_query_indexes.into_iter()) { let web_token = generate_tenant_token(&uid, &key, tenant_token.clone()); server.use_api_key(&web_token); - let (response, code) = server.multi_search(json!({"queries" : [ + let (mut response, code) = server.multi_search(json!({"queries" : [ {"indexUid": "sales"}, {"indexUid": "products"}, ]})).await; + if failed_query_index.is_none() && !response["message"].is_null() { + response["message"] = serde_json::json!(null); + } assert_eq!( response, invalid_response(failed_query_index), @@ -1073,18 +1081,20 @@ async fn error_access_expired_parent_key() { server.use_api_key(&web_token); // test search request while parent_key is not expired - let (response, code) = server + let (mut response, code) = server .multi_search(json!({"queries" : [{"indexUid": "sales"}, {"indexUid": "products"}]})) .await; + response["message"] = serde_json::json!(null); assert_ne!(response, invalid_response(None)); assert_ne!(code, 403); // wait until the key is expired. thread::sleep(time::Duration::new(1, 0)); - let (response, code) = server + let (mut response, code) = server .multi_search(json!({"queries" : [{"indexUid": "sales"}, {"indexUid": "products"}]})) .await; + response["message"] = serde_json::json!(null); assert_eq!(response, invalid_response(None)); assert_eq!(code, 403); } @@ -1134,8 +1144,9 @@ async fn error_access_modified_token() { .join("."); server.use_api_key(&altered_token); - let (response, code) = + let (mut response, code) = server.multi_search(json!({"queries" : [{"indexUid": "products"}]})).await; + response["message"] = serde_json::json!(null); assert_eq!(response, invalid_response(None)); assert_eq!(code, 403); } diff --git a/meilisearch/tests/common/mod.rs b/meilisearch/tests/common/mod.rs index 3117dd185..1391cf7cf 100644 --- a/meilisearch/tests/common/mod.rs +++ b/meilisearch/tests/common/mod.rs @@ -42,6 +42,12 @@ impl std::ops::Deref for Value { } } +impl std::ops::DerefMut for Value { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + impl PartialEq for Value { fn eq(&self, other: &serde_json::Value) -> bool { &self.0 == other From a74fb87d1e3600b55c5e5db90b7fe857ef252ac1 Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 24 Jun 2024 19:00:53 +0200 Subject: [PATCH 24/41] start introducing new error messages --- .../src/extractors/authentication/mod.rs | 109 ++++++++------ meilisearch/tests/auth/errors.rs | 134 +++++++++++++++++- 2 files changed, 200 insertions(+), 43 deletions(-) diff --git a/meilisearch/src/extractors/authentication/mod.rs b/meilisearch/src/extractors/authentication/mod.rs index 007e2be40..c3c38c27f 100644 --- a/meilisearch/src/extractors/authentication/mod.rs +++ b/meilisearch/src/extractors/authentication/mod.rs @@ -12,6 +12,8 @@ use futures::Future; use meilisearch_auth::{AuthController, AuthFilter}; use meilisearch_types::error::{Code, ResponseError}; +use self::policies::AuthError; + pub struct GuardedData { data: D, filters: AuthFilter, @@ -35,12 +37,12 @@ impl GuardedData { let missing_master_key = auth.get_master_key().is_none(); match Self::authenticate(auth, token, index).await? { - Some(filters) => match data { + Ok(filters) => match data { Some(data) => Ok(Self { data, filters, _marker: PhantomData }), None => Err(AuthenticationError::IrretrievableState.into()), }, - None if missing_master_key => Err(AuthenticationError::MissingMasterKey.into()), - None => Err(AuthenticationError::InvalidToken.into()), + Err(_) if missing_master_key => Err(AuthenticationError::MissingMasterKey.into()), + Err(e) => Err(ResponseError::from_msg(e.to_string(), Code::InvalidApiKey)), } } @@ -51,12 +53,12 @@ impl GuardedData { let missing_master_key = auth.get_master_key().is_none(); match Self::authenticate(auth, String::new(), None).await? { - Some(filters) => match data { + Ok(filters) => match data { Some(data) => Ok(Self { data, filters, _marker: PhantomData }), None => Err(AuthenticationError::IrretrievableState.into()), }, - None if missing_master_key => Err(AuthenticationError::MissingMasterKey.into()), - None => Err(AuthenticationError::MissingAuthorizationHeader.into()), + Err(_) if missing_master_key => Err(AuthenticationError::MissingMasterKey.into()), + Err(_) => Err(AuthenticationError::MissingAuthorizationHeader.into()), } } @@ -64,7 +66,7 @@ impl GuardedData { auth: Data, token: String, index: Option, - ) -> Result, ResponseError> + ) -> Result, ResponseError> where P: Policy + 'static, { @@ -127,7 +129,7 @@ pub trait Policy { auth: Data, token: &str, index: Option<&str>, - ) -> Option; + ) -> Result; } pub mod policies { @@ -144,11 +146,32 @@ pub mod policies { enum TenantTokenOutcome { NotATenantToken, - Invalid, - Expired, Valid(Uuid, SearchRules), } + #[derive(thiserror::Error, Debug)] + pub enum AuthError { + #[error("Tenant token expired. Was valid up to `{exp}` and we're now `{now}`")] + ExpiredTenantToken { exp: i64, now: i64 }, + #[error("The provided API key is invalid.")] + InvalidApiKey, + #[error("The provided tenant token is invalid.")] + InvalidTenantToken, + #[error("Could not decode tenant token, {0}")] + CouldNotDecodeTenantToken(jsonwebtoken::errors::Error), + } + + impl From for AuthError { + fn from(error: jsonwebtoken::errors::Error) -> Self { + use jsonwebtoken::errors::ErrorKind; + + match error.kind() { + ErrorKind::InvalidToken => AuthError::InvalidTenantToken, + _ => AuthError::CouldNotDecodeTenantToken(error), + } + } + } + fn tenant_token_validation() -> Validation { let mut validation = Validation::default(); validation.validate_exp = false; @@ -158,15 +181,15 @@ pub mod policies { } /// Extracts the key id used to sign the payload, without performing any validation. - fn extract_key_id(token: &str) -> Option { + fn extract_key_id(token: &str) -> Result { let mut validation = tenant_token_validation(); validation.insecure_disable_signature_validation(); let dummy_key = DecodingKey::from_secret(b"secret"); - let token_data = decode::(token, &dummy_key, &validation).ok()?; + let token_data = decode::(token, &dummy_key, &validation)?; // get token fields without validating it. let Claims { api_key_uid, .. } = token_data.claims; - Some(api_key_uid) + Ok(api_key_uid) } fn is_keys_action(action: u8) -> bool { @@ -187,76 +210,78 @@ pub mod policies { auth: Data, token: &str, index: Option<&str>, - ) -> Option { + ) -> Result { // authenticate if token is the master key. // Without a master key, all routes are accessible except the key-related routes. if auth.get_master_key().map_or_else(|| !is_keys_action(A), |mk| mk == token) { - return Some(AuthFilter::default()); + return Ok(AuthFilter::default()); } let (key_uuid, search_rules) = match ActionPolicy::::authenticate_tenant_token(&auth, token) { - TenantTokenOutcome::Valid(key_uuid, search_rules) => { + Ok(TenantTokenOutcome::Valid(key_uuid, search_rules)) => { (key_uuid, Some(search_rules)) } - TenantTokenOutcome::Expired => return None, - TenantTokenOutcome::Invalid => return None, - TenantTokenOutcome::NotATenantToken => { - (auth.get_optional_uid_from_encoded_key(token.as_bytes()).ok()??, None) - } + Ok(TenantTokenOutcome::NotATenantToken) + | Err(AuthError::InvalidTenantToken) => ( + auth.get_optional_uid_from_encoded_key(token.as_bytes()) + .map_err(|_e| AuthError::InvalidApiKey)? + .ok_or(AuthError::InvalidApiKey)?, + None, + ), + Err(e) => return Err(e), }; // check that the indexes are allowed - let action = Action::from_repr(A)?; - let auth_filter = auth.get_key_filters(key_uuid, search_rules).ok()?; + let action = Action::from_repr(A).ok_or(AuthError::InvalidTenantToken)?; + let auth_filter = auth + .get_key_filters(key_uuid, search_rules) + .map_err(|_e| AuthError::InvalidTenantToken)?; if auth.is_key_authorized(key_uuid, action, index).unwrap_or(false) && index.map(|index| auth_filter.is_index_authorized(index)).unwrap_or(true) { - return Some(auth_filter); + return Ok(auth_filter); } - None + Err(AuthError::InvalidApiKey) } } impl ActionPolicy { - fn authenticate_tenant_token(auth: &AuthController, token: &str) -> TenantTokenOutcome { + fn authenticate_tenant_token( + auth: &AuthController, + token: &str, + ) -> Result { // Only search action can be accessed by a tenant token. if A != actions::SEARCH { - return TenantTokenOutcome::NotATenantToken; + return Ok(TenantTokenOutcome::NotATenantToken); } - let uid = if let Some(uid) = extract_key_id(token) { - uid - } else { - return TenantTokenOutcome::NotATenantToken; - }; + let uid = extract_key_id(token)?; // Check if tenant token is valid. let key = if let Some(key) = auth.generate_key(uid) { key } else { - return TenantTokenOutcome::Invalid; + /// Only happens when no master key has been set + return Err(AuthError::InvalidTenantToken); }; - let data = if let Ok(data) = decode::( + let data = decode::( token, &DecodingKey::from_secret(key.as_bytes()), &tenant_token_validation(), - ) { - data - } else { - return TenantTokenOutcome::Invalid; - }; + )?; // Check if token is expired. if let Some(exp) = data.claims.exp { - if OffsetDateTime::now_utc().unix_timestamp() > exp { - return TenantTokenOutcome::Expired; + let now = OffsetDateTime::now_utc().unix_timestamp(); + if now > exp { + return Err(AuthError::ExpiredTenantToken { exp, now }); } } - TenantTokenOutcome::Valid(uid, data.claims.search_rules) + Ok(TenantTokenOutcome::Valid(uid, data.claims.search_rules)) } } diff --git a/meilisearch/tests/auth/errors.rs b/meilisearch/tests/auth/errors.rs index 581243a0a..4c65fce98 100644 --- a/meilisearch/tests/auth/errors.rs +++ b/meilisearch/tests/auth/errors.rs @@ -1,7 +1,10 @@ +use actix_web::test; +use http::StatusCode; +use jsonwebtoken::{EncodingKey, Header}; use meili_snap::*; use uuid::Uuid; -use crate::common::Server; +use crate::common::{Server, Value}; use crate::json; #[actix_rt::test] @@ -436,3 +439,132 @@ async fn patch_api_keys_unknown_field() { } "###); } + +async fn send_request_with_custom_auth( + app: impl actix_web::dev::Service< + actix_http::Request, + Response = actix_web::dev::ServiceResponse, + Error = actix_web::Error, + >, + url: &str, + auth: &str, +) -> (Value, StatusCode) { + let req = test::TestRequest::get().uri(url).insert_header(("Authorization", auth)).to_request(); + let res = test::call_service(&app, req).await; + let status_code = res.status(); + let body = test::read_body(res).await; + let response: Value = serde_json::from_slice(&body).unwrap_or_default(); + + (response, status_code) +} + +#[actix_rt::test] +async fn invalid_auth_format() { + let server = Server::new_auth().await; + let app = server.init_web_app().await; + + let req = test::TestRequest::get().uri("/indexes/dog/documents").to_request(); + let res = test::call_service(&app, req).await; + let status_code = res.status(); + let body = test::read_body(res).await; + let response: Value = serde_json::from_slice(&body).unwrap_or_default(); + snapshot!(status_code, @"401 Unauthorized"); + snapshot!(response, @r###" + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + "###); + + let (response, status_code) = + send_request_with_custom_auth(&app, "/indexes/dog/documents", "Bearer").await; + snapshot!(status_code, @"403 Forbidden"); + snapshot!(response, @r###" + { + "message": "The provided API key is invalid.", + "code": "invalid_api_key", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#invalid_api_key" + } + "###); + + let (response, status_code) = + send_request_with_custom_auth(&app, "/indexes/dog/documents", "Bearer kefir").await; + snapshot!(status_code, @"403 Forbidden"); + snapshot!(response, @r###" + { + "message": "The provided API key is invalid.", + "code": "invalid_api_key", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#invalid_api_key" + } + "###); + + // The tenant token won't be recognized at all if we're not on a search route + let claims = json!({ "tamo": "kefir" }); + let jwt = jsonwebtoken::encode(&Header::default(), &claims, &EncodingKey::from_secret(b"tamo")) + .unwrap(); + let (response, status_code) = + send_request_with_custom_auth(&app, "/indexes/dog/documents", &format!("Bearer {jwt}")) + .await; + snapshot!(status_code, @"403 Forbidden"); + snapshot!(response, @r###" + { + "message": "The provided API key is invalid.", + "code": "invalid_api_key", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#invalid_api_key" + } + "###); + + let claims = json!({ "tamo": "kefir" }); + let jwt = jsonwebtoken::encode(&Header::default(), &claims, &EncodingKey::from_secret(b"tamo")) + .unwrap(); + let (response, status_code) = + send_request_with_custom_auth(&app, "/indexes/dog/search", &format!("Bearer {jwt}")).await; + snapshot!(status_code, @"403 Forbidden"); + snapshot!(response, @r###" + { + "message": "Could not decode tenant token, JSON error: missing field `searchRules` at line 1 column 16", + "code": "invalid_api_key", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#invalid_api_key" + } + "###); + + // The error messages are not ideal but that's expected since we cannot _yet_ use deserr + let claims = json!({ "searchRules": "kefir" }); + let jwt = jsonwebtoken::encode(&Header::default(), &claims, &EncodingKey::from_secret(b"tamo")) + .unwrap(); + let (response, status_code) = + send_request_with_custom_auth(&app, "/indexes/dog/search", &format!("Bearer {jwt}")).await; + snapshot!(status_code, @"403 Forbidden"); + snapshot!(response, @r###" + { + "message": "Could not decode tenant token, JSON error: data did not match any variant of untagged enum SearchRules at line 1 column 23", + "code": "invalid_api_key", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#invalid_api_key" + } + "###); + + let uuid = Uuid::nil(); + let claims = json!({ "searchRules": ["kefir"], "apiKeyUid": uuid.to_string() }); + let jwt = jsonwebtoken::encode(&Header::default(), &claims, &EncodingKey::from_secret(b"tamo")) + .unwrap(); + let (response, status_code) = + send_request_with_custom_auth(&app, "/indexes/dog/search", &format!("Bearer {jwt}")).await; + snapshot!(status_code, @"403 Forbidden"); + snapshot!(response, @r###" + { + "message": "Could not decode tenant token, InvalidSignature", + "code": "invalid_api_key", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#invalid_api_key" + } + "###); + + // ~~ For the next tests we first need to retrieve an API key +} From 2e0ff56f3fcdb378c41bee1c4c89ba3d5f585bc4 Mon Sep 17 00:00:00 2001 From: Junho Choi Date: Thu, 30 May 2024 17:28:11 +0900 Subject: [PATCH 25/41] Add missing Korean support Some configuration is missing `korean` features and add a test case in `milli/src/search/mod.rs`. --- meilisearch-types/Cargo.toml | 2 ++ meilisearch/Cargo.toml | 1 + milli/src/search/mod.rs | 24 ++++++++++++++++++++++++ 3 files changed, 27 insertions(+) diff --git a/meilisearch-types/Cargo.toml b/meilisearch-types/Cargo.toml index 6d23f144a..a15fc01f8 100644 --- a/meilisearch-types/Cargo.toml +++ b/meilisearch-types/Cargo.toml @@ -49,6 +49,8 @@ chinese-pinyin = ["milli/chinese-pinyin"] hebrew = ["milli/hebrew"] # japanese specialized tokenization japanese = ["milli/japanese"] +# korean specialized tokenization +korean = ["milli/korean"] # thai specialized tokenization thai = ["milli/thai"] # allow greek specialized tokenization diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml index 75962c450..eacd3dd8e 100644 --- a/meilisearch/Cargo.toml +++ b/meilisearch/Cargo.toml @@ -151,6 +151,7 @@ chinese = ["meilisearch-types/chinese"] chinese-pinyin = ["meilisearch-types/chinese-pinyin"] hebrew = ["meilisearch-types/hebrew"] japanese = ["meilisearch-types/japanese"] +korean = ["meilisearch-types/korean"] thai = ["meilisearch-types/thai"] greek = ["meilisearch-types/greek"] khmer = ["meilisearch-types/khmer"] diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 19d5ff358..443e4b9c1 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -336,4 +336,28 @@ mod test { assert_eq!(documents_ids, vec![1]); } + + #[cfg(feature = "korean")] + #[test] + fn test_hangul_language_detection() { + use crate::index::tests::TempIndex; + + let index = TempIndex::new(); + + index + .add_documents(documents!([ + { "id": 0, "title": "The quick (\"brown\") fox can't jump 32.3 feet, right? Brr, it's 29.3°F!" }, + { "id": 1, "title": "김밥먹을래。" }, + { "id": 2, "title": "הַשּׁוּעָל הַמָּהִיר (״הַחוּם״) לֹא יָכוֹל לִקְפֹּץ 9.94 מֶטְרִים, נָכוֹן? ברר, 1.5°C- בַּחוּץ!" } + ])) + .unwrap(); + + let txn = index.write_txn().unwrap(); + let mut search = Search::new(&txn, &index); + + search.query("김밥"); + let SearchResult { documents_ids, .. } = search.execute().unwrap(); + + assert_eq!(documents_ids, vec![1]); + } } From 27496354e27bf7f1fb76aa714ec3c9dc6142afa1 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 5 Jun 2024 09:22:58 +0200 Subject: [PATCH 26/41] Grow by 1TB instead of 1MB --- index-scheduler/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 8a1c2f540..5557764e9 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -1801,7 +1801,7 @@ mod tests { task_db_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose. index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose. enable_mdb_writemap: false, - index_growth_amount: 1000 * 1000, // 1 MB + index_growth_amount: 1000 * 1000 * 1000 * 1000, // 1 TB index_count: 5, indexer_config, autobatching_enabled: true, From d75e0098c794dfb657a2db2e03a96cd527578b24 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 13 Jun 2024 17:47:44 +0200 Subject: [PATCH 27/41] Fixes for Rust v1.79 --- dump/src/reader/v3/settings.rs | 1 + dump/src/reader/v4/settings.rs | 1 + dump/src/reader/v5/tasks.rs | 1 + milli/Cargo.toml | 4 ++-- milli/src/search/new/logger/visual.rs | 11 ++++------- milli/src/update/index_documents/transform.rs | 4 +--- xtask/Cargo.toml | 2 +- 7 files changed, 11 insertions(+), 13 deletions(-) diff --git a/dump/src/reader/v3/settings.rs b/dump/src/reader/v3/settings.rs index 0027bf4ff..3288bb1e7 100644 --- a/dump/src/reader/v3/settings.rs +++ b/dump/src/reader/v3/settings.rs @@ -152,6 +152,7 @@ impl Settings { } #[derive(Debug, Clone, Deserialize)] +#[allow(dead_code)] // otherwise rustc complains that the fields go unused #[cfg_attr(test, derive(serde::Serialize))] #[serde(deny_unknown_fields)] #[serde(rename_all = "camelCase")] diff --git a/dump/src/reader/v4/settings.rs b/dump/src/reader/v4/settings.rs index 964cd1152..78d9118ff 100644 --- a/dump/src/reader/v4/settings.rs +++ b/dump/src/reader/v4/settings.rs @@ -182,6 +182,7 @@ impl Settings { } } +#[allow(dead_code)] // otherwise rustc complains that the fields go unused #[derive(Debug, Clone, Deserialize)] #[cfg_attr(test, derive(serde::Serialize))] #[serde(deny_unknown_fields)] diff --git a/dump/src/reader/v5/tasks.rs b/dump/src/reader/v5/tasks.rs index 528a870fc..8dfb2d0b0 100644 --- a/dump/src/reader/v5/tasks.rs +++ b/dump/src/reader/v5/tasks.rs @@ -200,6 +200,7 @@ impl std::ops::Deref for IndexUid { } } +#[allow(dead_code)] // otherwise rustc complains that the fields go unused #[derive(Debug)] #[cfg_attr(test, derive(serde::Serialize))] #[cfg_attr(test, serde(rename_all = "camelCase"))] diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 4a08e6261..27ce4a53e 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -71,10 +71,10 @@ csv = "1.3.0" candle-core = { version = "0.4.1" } candle-transformers = { version = "0.4.1" } candle-nn = { version = "0.4.1" } -tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.15.2", version = "0.15.2", default_features = false, features = [ +tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.15.2", version = "0.15.2", default-features = false, features = [ "onig", ] } -hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default_features = false, features = [ +hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default-features = false, features = [ "online", ] } tiktoken-rs = "0.5.8" diff --git a/milli/src/search/new/logger/visual.rs b/milli/src/search/new/logger/visual.rs index 8df56da89..2bffdd8d9 100644 --- a/milli/src/search/new/logger/visual.rs +++ b/milli/src/search/new/logger/visual.rs @@ -22,7 +22,7 @@ pub enum SearchEvents { RankingRuleStartIteration { ranking_rule_idx: usize, universe_len: u64 }, RankingRuleNextBucket { ranking_rule_idx: usize, universe_len: u64, bucket_len: u64 }, RankingRuleSkipBucket { ranking_rule_idx: usize, bucket_len: u64 }, - RankingRuleEndIteration { ranking_rule_idx: usize, universe_len: u64 }, + RankingRuleEndIteration { ranking_rule_idx: usize }, ExtendResults { new: Vec }, ProximityGraph { graph: RankingRuleGraph }, ProximityPaths { paths: Vec>> }, @@ -123,12 +123,9 @@ impl SearchLogger for VisualSearchLogger { &mut self, ranking_rule_idx: usize, _ranking_rule: &dyn RankingRule, - universe: &RoaringBitmap, + _universe: &RoaringBitmap, ) { - self.events.push(SearchEvents::RankingRuleEndIteration { - ranking_rule_idx, - universe_len: universe.len(), - }); + self.events.push(SearchEvents::RankingRuleEndIteration { ranking_rule_idx }); self.location.pop(); } fn add_to_results(&mut self, docids: &[u32]) { @@ -326,7 +323,7 @@ impl<'ctx> DetailedLoggerFinish<'ctx> { assert!(ranking_rule_idx == self.rr_action_counter.len() - 1); self.write_skip_bucket(bucket_len)?; } - SearchEvents::RankingRuleEndIteration { ranking_rule_idx, universe_len: _ } => { + SearchEvents::RankingRuleEndIteration { ranking_rule_idx } => { assert!(ranking_rule_idx == self.rr_action_counter.len() - 1); self.write_end_iteration()?; } diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index c34b7876a..876663c92 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -51,7 +51,6 @@ pub struct Transform<'a, 'i> { fields_ids_map: FieldsIdsMap, indexer_settings: &'a IndexerConfig, - pub autogenerate_docids: bool, pub index_documents_method: IndexDocumentsMethod, available_documents_ids: AvailableDocumentsIds, @@ -105,7 +104,7 @@ impl<'a, 'i> Transform<'a, 'i> { index: &'i Index, indexer_settings: &'a IndexerConfig, index_documents_method: IndexDocumentsMethod, - autogenerate_docids: bool, + _autogenerate_docids: bool, ) -> Result { // We must choose the appropriate merge function for when two or more documents // with the same user id must be merged or fully replaced in the same batch. @@ -139,7 +138,6 @@ impl<'a, 'i> Transform<'a, 'i> { index, fields_ids_map: index.fields_ids_map(wtxn)?, indexer_settings, - autogenerate_docids, available_documents_ids: AvailableDocumentsIds::from_documents_ids(&documents_ids), original_sorter, flattened_sorter, diff --git a/xtask/Cargo.toml b/xtask/Cargo.toml index 562dfddb3..a618b06a5 100644 --- a/xtask/Cargo.toml +++ b/xtask/Cargo.toml @@ -21,7 +21,7 @@ reqwest = { version = "0.11.23", features = [ "stream", "json", "rustls-tls", -], default_features = false } +], default-features = false } serde = { version = "1.0.195", features = ["derive"] } serde_json = "1.0.111" sha2 = "0.10.8" From 7da21bb6011df0b9a021f8f6a17d7c5184173a42 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 25 Jun 2024 12:40:51 +0200 Subject: [PATCH 28/41] introduce as many custom error message as possible --- meilisearch-auth/src/lib.rs | 45 ++++++ .../src/extractors/authentication/mod.rs | 61 +++++++- meilisearch/tests/auth/authorization.rs | 11 +- meilisearch/tests/auth/errors.rs | 140 +++++++++++++++++- 4 files changed, 240 insertions(+), 17 deletions(-) diff --git a/meilisearch-auth/src/lib.rs b/meilisearch-auth/src/lib.rs index e74f1707c..4dbf1bf6f 100644 --- a/meilisearch-auth/src/lib.rs +++ b/meilisearch-auth/src/lib.rs @@ -188,6 +188,12 @@ impl AuthFilter { self.allow_index_creation && self.is_index_authorized(index) } + #[inline] + /// Return true if a tenant token was used to generate the search rules. + pub fn is_tenant_token(&self) -> bool { + self.search_rules.is_some() + } + pub fn with_allowed_indexes(allowed_indexes: HashSet) -> Self { Self { search_rules: None, @@ -205,6 +211,7 @@ impl AuthFilter { .unwrap_or(true) } + /// Check if the index is authorized by the API key and the tenant token. pub fn is_index_authorized(&self, index: &str) -> bool { self.key_authorized_indexes.is_index_authorized(index) && self @@ -214,6 +221,44 @@ impl AuthFilter { .unwrap_or(true) } + /// Only check if the index is authorized by the API key + pub fn api_key_is_index_authorized(&self, index: &str) -> bool { + self.key_authorized_indexes.is_index_authorized(index) + } + + /// Only check if the index is authorized by the tenant token + pub fn tenant_token_is_index_authorized(&self, index: &str) -> bool { + self.search_rules + .as_ref() + .map(|search_rules| search_rules.is_index_authorized(index)) + .unwrap_or(true) + } + + /// Return the list of authorized indexes by the tenant token if any + pub fn tenant_token_list_index_authorized(&self) -> Vec { + match self.search_rules { + Some(ref search_rules) => { + let mut indexes: Vec<_> = match search_rules { + SearchRules::Set(set) => set.iter().map(|s| s.to_string()).collect(), + SearchRules::Map(map) => map.keys().map(|s| s.to_string()).collect(), + }; + indexes.sort_unstable(); + indexes + } + None => Vec::new(), + } + } + + /// Return the list of authorized indexes by the api key if any + pub fn api_key_list_index_authorized(&self) -> Vec { + let mut indexes: Vec<_> = match self.key_authorized_indexes { + SearchRules::Set(ref set) => set.iter().map(|s| s.to_string()).collect(), + SearchRules::Map(ref map) => map.keys().map(|s| s.to_string()).collect(), + }; + indexes.sort_unstable(); + indexes + } + pub fn get_index_search_rules(&self, index: &str) -> Option { if !self.is_index_authorized(index) { return None; diff --git a/meilisearch/src/extractors/authentication/mod.rs b/meilisearch/src/extractors/authentication/mod.rs index c3c38c27f..ecd1cadf8 100644 --- a/meilisearch/src/extractors/authentication/mod.rs +++ b/meilisearch/src/extractors/authentication/mod.rs @@ -136,6 +136,7 @@ pub mod policies { use actix_web::web::Data; use jsonwebtoken::{decode, Algorithm, DecodingKey, Validation}; use meilisearch_auth::{AuthController, AuthFilter, SearchRules}; + use meilisearch_types::error::{Code, ErrorCode}; // reexport actions in policies in order to be used in routes configuration. pub use meilisearch_types::keys::{actions, Action}; use serde::{Deserialize, Serialize}; @@ -151,14 +152,26 @@ pub mod policies { #[derive(thiserror::Error, Debug)] pub enum AuthError { - #[error("Tenant token expired. Was valid up to `{exp}` and we're now `{now}`")] + #[error("Tenant token expired. Was valid up to `{exp}` and we're now `{now}`.")] ExpiredTenantToken { exp: i64, now: i64 }, #[error("The provided API key is invalid.")] InvalidApiKey, + #[error("The provided tenant token cannot acces the index `{index}`, allowed indexes are {allowed:?}.")] + TenantTokenAccessingnUnauthorizedIndex { index: String, allowed: Vec }, + #[error( + "The API key used to generate this tenant token cannot acces the index `{index}`." + )] + TenantTokenApiKeyAccessingnUnauthorizedIndex { index: String }, + #[error( + "The API key cannot acces the index `{index}`, authorized indexes are {allowed:?}." + )] + ApiKeyAccessingnUnauthorizedIndex { index: String, allowed: Vec }, #[error("The provided tenant token is invalid.")] InvalidTenantToken, - #[error("Could not decode tenant token, {0}")] + #[error("Could not decode tenant token, {0}.")] CouldNotDecodeTenantToken(jsonwebtoken::errors::Error), + #[error("Invalid action `{0}`.")] + InternalInvalidAction(u8), } impl From for AuthError { @@ -172,6 +185,15 @@ pub mod policies { } } + impl ErrorCode for AuthError { + fn error_code(&self) -> Code { + match self { + AuthError::InternalInvalidAction(_) => Code::Internal, + _ => Code::InvalidApiKey, + } + } + } + fn tenant_token_validation() -> Validation { let mut validation = Validation::default(); validation.validate_exp = false; @@ -233,13 +255,37 @@ pub mod policies { }; // check that the indexes are allowed - let action = Action::from_repr(A).ok_or(AuthError::InvalidTenantToken)?; + let action = Action::from_repr(A).ok_or(AuthError::InternalInvalidAction(A))?; let auth_filter = auth .get_key_filters(key_uuid, search_rules) - .map_err(|_e| AuthError::InvalidTenantToken)?; - if auth.is_key_authorized(key_uuid, action, index).unwrap_or(false) - && index.map(|index| auth_filter.is_index_authorized(index)).unwrap_or(true) - { + .map_err(|_e| AuthError::InvalidApiKey)?; + + // First check if the index is authorized in the tenant token, this is a public + // information, we can return a nice error message. + if let Some(index) = index { + if !auth_filter.tenant_token_is_index_authorized(index) { + return Err(AuthError::TenantTokenAccessingnUnauthorizedIndex { + index: index.to_string(), + allowed: auth_filter.tenant_token_list_index_authorized(), + }); + } + if !auth_filter.api_key_is_index_authorized(index) { + if auth_filter.is_tenant_token() { + // If the error comes from a tenant token we cannot share the list + // of authorized indexes in the API key. This is not public information. + return Err(AuthError::TenantTokenApiKeyAccessingnUnauthorizedIndex { + index: index.to_string(), + }); + } else { + // Otherwise we can share the + return Err(AuthError::ApiKeyAccessingnUnauthorizedIndex { + index: index.to_string(), + allowed: auth_filter.api_key_list_index_authorized(), + }); + } + } + } + if auth.is_key_authorized(key_uuid, action, index).unwrap_or(false) { return Ok(auth_filter); } @@ -263,7 +309,6 @@ pub mod policies { let key = if let Some(key) = auth.generate_key(uid) { key } else { - /// Only happens when no master key has been set return Err(AuthError::InvalidTenantToken); }; diff --git a/meilisearch/tests/auth/authorization.rs b/meilisearch/tests/auth/authorization.rs index a57c9e11d..609b7d01b 100644 --- a/meilisearch/tests/auth/authorization.rs +++ b/meilisearch/tests/auth/authorization.rs @@ -78,7 +78,7 @@ pub static ALL_ACTIONS: Lazy> = Lazy::new(|| { }); static INVALID_RESPONSE: Lazy = Lazy::new(|| { - json!({"message": "The provided API key is invalid.", + json!({"message": null, "code": "invalid_api_key", "type": "auth", "link": "https://docs.meilisearch.com/errors#invalid_api_key" @@ -119,7 +119,8 @@ async fn error_access_expired_key() { thread::sleep(time::Duration::new(1, 0)); for (method, route) in AUTHORIZATIONS.keys() { - let (response, code) = server.dummy_request(method, route).await; + let (mut response, code) = server.dummy_request(method, route).await; + response["message"] = serde_json::json!(null); assert_eq!(response, INVALID_RESPONSE.clone(), "on route: {:?} - {:?}", method, route); assert_eq!(403, code, "{:?}", &response); @@ -149,7 +150,8 @@ async fn error_access_unauthorized_index() { // filter `products` index routes .filter(|(_, route)| route.starts_with("/indexes/products")) { - let (response, code) = server.dummy_request(method, route).await; + let (mut response, code) = server.dummy_request(method, route).await; + response["message"] = serde_json::json!(null); assert_eq!(response, INVALID_RESPONSE.clone(), "on route: {:?} - {:?}", method, route); assert_eq!(403, code, "{:?}", &response); @@ -176,7 +178,8 @@ async fn error_access_unauthorized_action() { let key = response["key"].as_str().unwrap(); server.use_api_key(key); - let (response, code) = server.dummy_request(method, route).await; + let (mut response, code) = server.dummy_request(method, route).await; + response["message"] = serde_json::json!(null); assert_eq!(response, INVALID_RESPONSE.clone(), "on route: {:?} - {:?}", method, route); assert_eq!(403, code, "{:?}", &response); diff --git a/meilisearch/tests/auth/errors.rs b/meilisearch/tests/auth/errors.rs index 4c65fce98..466eefe65 100644 --- a/meilisearch/tests/auth/errors.rs +++ b/meilisearch/tests/auth/errors.rs @@ -478,6 +478,21 @@ async fn invalid_auth_format() { } "###); + let req = test::TestRequest::get().uri("/indexes/dog/documents").to_request(); + let res = test::call_service(&app, req).await; + let status_code = res.status(); + let body = test::read_body(res).await; + let response: Value = serde_json::from_slice(&body).unwrap_or_default(); + snapshot!(status_code, @"401 Unauthorized"); + snapshot!(response, @r###" + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + "###); + let (response, status_code) = send_request_with_custom_auth(&app, "/indexes/dog/documents", "Bearer").await; snapshot!(status_code, @"403 Forbidden"); @@ -489,9 +504,15 @@ async fn invalid_auth_format() { "link": "https://docs.meilisearch.com/errors#invalid_api_key" } "###); +} + +#[actix_rt::test] +async fn invalid_api_key() { + let server = Server::new_auth().await; + let app = server.init_web_app().await; let (response, status_code) = - send_request_with_custom_auth(&app, "/indexes/dog/documents", "Bearer kefir").await; + send_request_with_custom_auth(&app, "/indexes/dog/search", "Bearer kefir").await; snapshot!(status_code, @"403 Forbidden"); snapshot!(response, @r###" { @@ -502,6 +523,54 @@ async fn invalid_auth_format() { } "###); + let uuid = Uuid::nil(); + let key = json!({ "actions": ["search"], "indexes": ["dog"], "expiresAt": null, "uid": uuid.to_string() }); + let req = test::TestRequest::post() + .uri("/keys") + .insert_header(("Authorization", "Bearer MASTER_KEY")) + .set_json(&key) + .to_request(); + let res = test::call_service(&app, req).await; + let body = test::read_body(res).await; + let response: Value = serde_json::from_slice(&body).unwrap_or_default(); + snapshot!(json_string!(response, { ".createdAt" => "[date]", ".updatedAt" => "[date]" }), @r###" + { + "name": null, + "description": null, + "key": "aeb94973e0b6e912d94165430bbe87dee91a7c4f891ce19050c3910ec96977e9", + "uid": "00000000-0000-0000-0000-000000000000", + "actions": [ + "search" + ], + "indexes": [ + "dog" + ], + "expiresAt": null, + "createdAt": "[date]", + "updatedAt": "[date]" + } + "###); + let key = response["key"].as_str().unwrap(); + + let (response, status_code) = + send_request_with_custom_auth(&app, "/indexes/doggo/search", &format!("Bearer {key}")) + .await; + snapshot!(status_code, @"403 Forbidden"); + snapshot!(response, @r###" + { + "message": "The API key cannot acces the index `doggo`, authorized indexes are [\"dog\"].", + "code": "invalid_api_key", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#invalid_api_key" + } + "###); +} + +#[actix_rt::test] +async fn invalid_tenant_token() { + let server = Server::new_auth().await; + let app = server.init_web_app().await; + // The tenant token won't be recognized at all if we're not on a search route let claims = json!({ "tamo": "kefir" }); let jwt = jsonwebtoken::encode(&Header::default(), &claims, &EncodingKey::from_secret(b"tamo")) @@ -527,7 +596,7 @@ async fn invalid_auth_format() { snapshot!(status_code, @"403 Forbidden"); snapshot!(response, @r###" { - "message": "Could not decode tenant token, JSON error: missing field `searchRules` at line 1 column 16", + "message": "Could not decode tenant token, JSON error: missing field `searchRules` at line 1 column 16.", "code": "invalid_api_key", "type": "auth", "link": "https://docs.meilisearch.com/errors#invalid_api_key" @@ -543,7 +612,7 @@ async fn invalid_auth_format() { snapshot!(status_code, @"403 Forbidden"); snapshot!(response, @r###" { - "message": "Could not decode tenant token, JSON error: data did not match any variant of untagged enum SearchRules at line 1 column 23", + "message": "Could not decode tenant token, JSON error: data did not match any variant of untagged enum SearchRules at line 1 column 23.", "code": "invalid_api_key", "type": "auth", "link": "https://docs.meilisearch.com/errors#invalid_api_key" @@ -559,12 +628,73 @@ async fn invalid_auth_format() { snapshot!(status_code, @"403 Forbidden"); snapshot!(response, @r###" { - "message": "Could not decode tenant token, InvalidSignature", + "message": "Could not decode tenant token, InvalidSignature.", "code": "invalid_api_key", "type": "auth", "link": "https://docs.meilisearch.com/errors#invalid_api_key" } "###); - // ~~ For the next tests we first need to retrieve an API key + // ~~ For the next tests we first need a valid API key + let key = json!({ "actions": ["search"], "indexes": ["dog"], "expiresAt": null, "uid": uuid.to_string() }); + let req = test::TestRequest::post() + .uri("/keys") + .insert_header(("Authorization", "Bearer MASTER_KEY")) + .set_json(&key) + .to_request(); + let res = test::call_service(&app, req).await; + let body = test::read_body(res).await; + let response: Value = serde_json::from_slice(&body).unwrap_or_default(); + snapshot!(json_string!(response, { ".createdAt" => "[date]", ".updatedAt" => "[date]" }), @r###" + { + "name": null, + "description": null, + "key": "aeb94973e0b6e912d94165430bbe87dee91a7c4f891ce19050c3910ec96977e9", + "uid": "00000000-0000-0000-0000-000000000000", + "actions": [ + "search" + ], + "indexes": [ + "dog" + ], + "expiresAt": null, + "createdAt": "[date]", + "updatedAt": "[date]" + } + "###); + let key = response["key"].as_str().unwrap(); + + let claims = json!({ "searchRules": ["doggo", "catto"], "apiKeyUid": uuid.to_string() }); + let jwt = jsonwebtoken::encode( + &Header::default(), + &claims, + &EncodingKey::from_secret(key.as_bytes()), + ) + .unwrap(); + // Try to access an index that is not authorized by the tenant token + let (response, status_code) = + send_request_with_custom_auth(&app, "/indexes/dog/search", &format!("Bearer {jwt}")).await; + snapshot!(status_code, @"403 Forbidden"); + snapshot!(response, @r###" + { + "message": "The provided tenant token cannot acces the index `dog`, allowed indexes are [\"catto\", \"doggo\"].", + "code": "invalid_api_key", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#invalid_api_key" + } + "###); + + // Try to access an index that *is* authorized by the tenant token but not by the api key used to generate the tt + let (response, status_code) = + send_request_with_custom_auth(&app, "/indexes/doggo/search", &format!("Bearer {jwt}")) + .await; + snapshot!(status_code, @"403 Forbidden"); + snapshot!(response, @r###" + { + "message": "The API key used to generate this tenant token cannot acces the index `doggo`.", + "code": "invalid_api_key", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#invalid_api_key" + } + "###); } From dcdc83946fa2939ed6c446b4e434edc35f923678 Mon Sep 17 00:00:00 2001 From: JWSong Date: Tue, 25 Jun 2024 21:41:47 +0900 Subject: [PATCH 29/41] accept large number as string --- meilisearch/tests/documents/add_documents.rs | 71 ++++++++++++++++++++ milli/src/documents/primary_key.rs | 2 +- 2 files changed, 72 insertions(+), 1 deletion(-) diff --git a/meilisearch/tests/documents/add_documents.rs b/meilisearch/tests/documents/add_documents.rs index b1262fa2d..49e5af95e 100644 --- a/meilisearch/tests/documents/add_documents.rs +++ b/meilisearch/tests/documents/add_documents.rs @@ -1040,6 +1040,77 @@ async fn document_addition_with_primary_key() { "###); } +#[actix_rt::test] +async fn document_addition_with_huge_int_primary_key() { + let server = Server::new().await; + let index = server.index("test"); + + let documents = json!([ + { + "primary": 14630868576586246730u64, + "content": "foo", + } + ]); + let (response, code) = index.add_documents(documents, Some("primary")).await; + snapshot!(code, @"202 Accepted"); + snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), + @r###" + { + "taskUid": 0, + "indexUid": "test", + "status": "enqueued", + "type": "documentAdditionOrUpdate", + "enqueuedAt": "[date]" + } + "###); + + index.wait_task(0).await; + + let (response, code) = index.get_task(0).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), + @r###" + { + "uid": 0, + "indexUid": "test", + "status": "succeeded", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 1, + "indexedDocuments": 1 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (response, code) = index.get().await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".createdAt" => "[date]", ".updatedAt" => "[date]" }), + @r###" + { + "uid": "test", + "createdAt": "[date]", + "updatedAt": "[date]", + "primaryKey": "primary" + } + "###); + + let (response, code) = index.get_document(14630868576586246730u64, None).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response), + @r###" + { + "primary": 14630868576586246730, + "content": "foo" + } + "###); +} + #[actix_rt::test] async fn replace_document() { let server = Server::new().await; diff --git a/milli/src/documents/primary_key.rs b/milli/src/documents/primary_key.rs index 29f95beaf..22918f8fc 100644 --- a/milli/src/documents/primary_key.rs +++ b/milli/src/documents/primary_key.rs @@ -166,7 +166,7 @@ pub fn validate_document_id_value(document_id: Value) -> StdResult Ok(s.to_string()), None => Err(UserError::InvalidDocumentId { document_id: Value::String(string) }), }, - Value::Number(number) if number.is_i64() => Ok(number.to_string()), + Value::Number(number) if !number.is_f64() => Ok(number.to_string()), content => Err(UserError::InvalidDocumentId { document_id: content }), } } From 6fb36ed30e9bd3768b9a9686606198029d6172b9 Mon Sep 17 00:00:00 2001 From: JWSong Date: Tue, 25 Jun 2024 23:54:27 +0900 Subject: [PATCH 30/41] get rid of the redundant info in document_addition_with_huge_int_primary_key --- meilisearch/tests/documents/add_documents.rs | 29 ++------------------ 1 file changed, 2 insertions(+), 27 deletions(-) diff --git a/meilisearch/tests/documents/add_documents.rs b/meilisearch/tests/documents/add_documents.rs index 49e5af95e..85fc61503 100644 --- a/meilisearch/tests/documents/add_documents.rs +++ b/meilisearch/tests/documents/add_documents.rs @@ -1053,22 +1053,9 @@ async fn document_addition_with_huge_int_primary_key() { ]); let (response, code) = index.add_documents(documents, Some("primary")).await; snapshot!(code, @"202 Accepted"); - snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), - @r###" - { - "taskUid": 0, - "indexUid": "test", - "status": "enqueued", - "type": "documentAdditionOrUpdate", - "enqueuedAt": "[date]" - } - "###); - index.wait_task(0).await; - - let (response, code) = index.get_task(0).await; - snapshot!(code, @"200 OK"); - snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), + let response = index.wait_task(response.uid()).await; + snapshot!(response, @r###" { "uid": 0, @@ -1088,18 +1075,6 @@ async fn document_addition_with_huge_int_primary_key() { } "###); - let (response, code) = index.get().await; - snapshot!(code, @"200 OK"); - snapshot!(json_string!(response, { ".createdAt" => "[date]", ".updatedAt" => "[date]" }), - @r###" - { - "uid": "test", - "createdAt": "[date]", - "updatedAt": "[date]", - "primaryKey": "primary" - } - "###); - let (response, code) = index.get_document(14630868576586246730u64, None).await; snapshot!(code, @"200 OK"); snapshot!(json_string!(response), From ab6cac232170498ec892966d0b5f0d280925b7f9 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 19 Jun 2024 10:57:19 +0200 Subject: [PATCH 31/41] specify the rust toolchain --- rust-toolchain.toml | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 rust-toolchain.toml diff --git a/rust-toolchain.toml b/rust-toolchain.toml new file mode 100644 index 000000000..5be51f3c2 --- /dev/null +++ b/rust-toolchain.toml @@ -0,0 +1,3 @@ +[toolchain] +channel = "1.78.0" +components = ["clippy"] From 5c758438fc1f5566707925bb384a7ae145636b99 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 25 Jun 2024 11:25:15 +0200 Subject: [PATCH 32/41] Update the CI to take the rust-toolchain file into account --- .github/workflows/bench-manual.yml | 2 -- .github/workflows/bench-pr.yml | 2 -- .github/workflows/bench-push-indexing.yml | 2 -- .github/workflows/benchmarks-manual.yml | 2 -- .github/workflows/benchmarks-pr.yml | 2 -- .github/workflows/benchmarks-push-indexing.yml | 2 -- .../workflows/benchmarks-push-search-geo.yml | 2 -- .../workflows/benchmarks-push-search-songs.yml | 2 -- .../workflows/benchmarks-push-search-wiki.yml | 2 -- .github/workflows/flaky-tests.yml | 3 --- .github/workflows/fuzzer-indexing.yml | 2 -- .github/workflows/publish-apt-brew-pkg.yml | 3 --- .github/workflows/publish-binaries.yml | 10 ---------- .github/workflows/test-suite.yml | 17 +---------------- .github/workflows/update-cargo-toml-version.yml | 2 -- 15 files changed, 1 insertion(+), 54 deletions(-) diff --git a/.github/workflows/bench-manual.yml b/.github/workflows/bench-manual.yml index 6d8c3a006..3352a4b40 100644 --- a/.github/workflows/bench-manual.yml +++ b/.github/workflows/bench-manual.yml @@ -21,8 +21,6 @@ jobs: - uses: actions-rs/toolchain@v1 with: profile: minimal - toolchain: stable - override: true - name: Run benchmarks - workload ${WORKLOAD_NAME} - branch ${{ github.ref }} - commit ${{ github.sha }} run: | diff --git a/.github/workflows/bench-pr.yml b/.github/workflows/bench-pr.yml index 36af79460..7e8f26a13 100644 --- a/.github/workflows/bench-pr.yml +++ b/.github/workflows/bench-pr.yml @@ -38,8 +38,6 @@ jobs: - uses: actions-rs/toolchain@v1 with: profile: minimal - toolchain: stable - override: true - name: Run benchmarks on PR ${{ github.event.issue.id }} run: | diff --git a/.github/workflows/bench-push-indexing.yml b/.github/workflows/bench-push-indexing.yml index fd0f19a5a..3d92899af 100644 --- a/.github/workflows/bench-push-indexing.yml +++ b/.github/workflows/bench-push-indexing.yml @@ -15,8 +15,6 @@ jobs: - uses: actions-rs/toolchain@v1 with: profile: minimal - toolchain: stable - override: true # Run benchmarks - name: Run benchmarks - Dataset ${BENCH_NAME} - Branch main - Commit ${{ github.sha }} diff --git a/.github/workflows/benchmarks-manual.yml b/.github/workflows/benchmarks-manual.yml index b967eb073..e24c03d2b 100644 --- a/.github/workflows/benchmarks-manual.yml +++ b/.github/workflows/benchmarks-manual.yml @@ -21,8 +21,6 @@ jobs: - uses: actions-rs/toolchain@v1 with: profile: minimal - toolchain: stable - override: true # Set variables - name: Set current branch name diff --git a/.github/workflows/benchmarks-pr.yml b/.github/workflows/benchmarks-pr.yml index 30baa294e..322f394e7 100644 --- a/.github/workflows/benchmarks-pr.yml +++ b/.github/workflows/benchmarks-pr.yml @@ -16,8 +16,6 @@ jobs: - uses: actions-rs/toolchain@v1 with: profile: minimal - toolchain: stable - override: true - name: Check for Command id: command diff --git a/.github/workflows/benchmarks-push-indexing.yml b/.github/workflows/benchmarks-push-indexing.yml index a966570e6..e9c589504 100644 --- a/.github/workflows/benchmarks-push-indexing.yml +++ b/.github/workflows/benchmarks-push-indexing.yml @@ -19,8 +19,6 @@ jobs: - uses: actions-rs/toolchain@v1 with: profile: minimal - toolchain: stable - override: true # Set variables - name: Set current branch name diff --git a/.github/workflows/benchmarks-push-search-geo.yml b/.github/workflows/benchmarks-push-search-geo.yml index 1b5cacfd1..652e92311 100644 --- a/.github/workflows/benchmarks-push-search-geo.yml +++ b/.github/workflows/benchmarks-push-search-geo.yml @@ -18,8 +18,6 @@ jobs: - uses: actions-rs/toolchain@v1 with: profile: minimal - toolchain: stable - override: true # Set variables - name: Set current branch name diff --git a/.github/workflows/benchmarks-push-search-songs.yml b/.github/workflows/benchmarks-push-search-songs.yml index 02cd10472..a9b75ba80 100644 --- a/.github/workflows/benchmarks-push-search-songs.yml +++ b/.github/workflows/benchmarks-push-search-songs.yml @@ -18,8 +18,6 @@ jobs: - uses: actions-rs/toolchain@v1 with: profile: minimal - toolchain: stable - override: true # Set variables - name: Set current branch name diff --git a/.github/workflows/benchmarks-push-search-wiki.yml b/.github/workflows/benchmarks-push-search-wiki.yml index 455aaa95d..0c3406e49 100644 --- a/.github/workflows/benchmarks-push-search-wiki.yml +++ b/.github/workflows/benchmarks-push-search-wiki.yml @@ -18,8 +18,6 @@ jobs: - uses: actions-rs/toolchain@v1 with: profile: minimal - toolchain: stable - override: true # Set variables - name: Set current branch name diff --git a/.github/workflows/flaky-tests.yml b/.github/workflows/flaky-tests.yml index c7e81aacc..4c008d6b3 100644 --- a/.github/workflows/flaky-tests.yml +++ b/.github/workflows/flaky-tests.yml @@ -17,9 +17,6 @@ jobs: apt-get update && apt-get install -y curl apt-get install build-essential -y - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - override: true - name: Install cargo-flaky run: cargo install cargo-flaky - name: Run cargo flaky in the dumps diff --git a/.github/workflows/fuzzer-indexing.yml b/.github/workflows/fuzzer-indexing.yml index 1d01a6ea5..ceaef57ac 100644 --- a/.github/workflows/fuzzer-indexing.yml +++ b/.github/workflows/fuzzer-indexing.yml @@ -15,8 +15,6 @@ jobs: - uses: actions-rs/toolchain@v1 with: profile: minimal - toolchain: stable - override: true # Run benchmarks - name: Run the fuzzer diff --git a/.github/workflows/publish-apt-brew-pkg.yml b/.github/workflows/publish-apt-brew-pkg.yml index 11893bae0..628a67263 100644 --- a/.github/workflows/publish-apt-brew-pkg.yml +++ b/.github/workflows/publish-apt-brew-pkg.yml @@ -26,9 +26,6 @@ jobs: apt-get update && apt-get install -y curl apt-get install build-essential -y - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - override: true - name: Install cargo-deb run: cargo install cargo-deb - uses: actions/checkout@v3 diff --git a/.github/workflows/publish-binaries.yml b/.github/workflows/publish-binaries.yml index 2372ce497..d9439cb32 100644 --- a/.github/workflows/publish-binaries.yml +++ b/.github/workflows/publish-binaries.yml @@ -46,9 +46,6 @@ jobs: apt-get update && apt-get install -y curl apt-get install build-essential -y - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - override: true - name: Build run: cargo build --release --locked # No need to upload binaries for dry run (cron) @@ -79,9 +76,6 @@ jobs: steps: - uses: actions/checkout@v3 - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - override: true - name: Build run: cargo build --release --locked # No need to upload binaries for dry run (cron) @@ -109,10 +103,8 @@ jobs: - name: Installing Rust toolchain uses: actions-rs/toolchain@v1 with: - toolchain: stable profile: minimal target: ${{ matrix.target }} - override: true - name: Cargo build uses: actions-rs/cargo@v1 with: @@ -156,10 +148,8 @@ jobs: - name: Installing Rust toolchain uses: actions-rs/toolchain@v1 with: - toolchain: stable profile: minimal target: ${{ matrix.target }} - override: true - name: Configure target aarch64 GNU ## Environment variable is not passed using env: ## LD gold won't work with MUSL diff --git a/.github/workflows/test-suite.yml b/.github/workflows/test-suite.yml index 5dbde4301..fd9238bf1 100644 --- a/.github/workflows/test-suite.yml +++ b/.github/workflows/test-suite.yml @@ -32,9 +32,6 @@ jobs: apt-get install build-essential -y - name: Setup test with Rust stable uses: actions-rs/toolchain@v1 - with: - toolchain: stable - override: true - name: Cache dependencies uses: Swatinem/rust-cache@v2.7.1 - name: Run cargo check without any default features @@ -60,9 +57,6 @@ jobs: - name: Cache dependencies uses: Swatinem/rust-cache@v2.7.1 - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - override: true - name: Run cargo check without any default features uses: actions-rs/cargo@v1 with: @@ -88,9 +82,6 @@ jobs: apt-get update apt-get install --assume-yes build-essential curl - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - override: true - name: Run cargo build with almost all features run: | cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)" @@ -111,9 +102,6 @@ jobs: apt-get update apt-get install --assume-yes build-essential curl - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - override: true - name: Run cargo tree without default features and check lindera is not present run: | if cargo tree -f '{p} {f}' -e normal --no-default-features | grep -vqz lindera; then @@ -138,9 +126,6 @@ jobs: apt-get update && apt-get install -y curl apt-get install build-essential -y - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - override: true - name: Cache dependencies uses: Swatinem/rust-cache@v2.7.1 - name: Run tests in debug @@ -176,7 +161,7 @@ jobs: - uses: actions-rs/toolchain@v1 with: profile: minimal - toolchain: nightly + toolchain: nightly-2024-06-25 override: true components: rustfmt - name: Cache dependencies diff --git a/.github/workflows/update-cargo-toml-version.yml b/.github/workflows/update-cargo-toml-version.yml index 51ab6d1ab..0f68bdc49 100644 --- a/.github/workflows/update-cargo-toml-version.yml +++ b/.github/workflows/update-cargo-toml-version.yml @@ -21,8 +21,6 @@ jobs: - uses: actions-rs/toolchain@v1 with: profile: minimal - toolchain: stable - override: true - name: Install sd run: cargo install sd - name: Update Cargo.toml file From e16edb2c352b025929401b74de27707b615a0b81 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 25 Jun 2024 17:00:50 +0200 Subject: [PATCH 33/41] use the helix action since the official one doesn't support the rust-toolchain file --- .github/workflows/bench-manual.yml | 2 +- .github/workflows/bench-pr.yml | 2 +- .github/workflows/bench-push-indexing.yml | 2 +- .github/workflows/benchmarks-manual.yml | 2 +- .github/workflows/benchmarks-pr.yml | 2 +- .github/workflows/benchmarks-push-indexing.yml | 2 +- .github/workflows/benchmarks-push-search-geo.yml | 2 +- .github/workflows/benchmarks-push-search-songs.yml | 2 +- .github/workflows/benchmarks-push-search-wiki.yml | 2 +- .github/workflows/flaky-tests.yml | 2 +- .github/workflows/fuzzer-indexing.yml | 2 +- .github/workflows/publish-apt-brew-pkg.yml | 2 +- .github/workflows/publish-binaries.yml | 8 ++++---- .github/workflows/test-suite.yml | 14 +++++++------- .github/workflows/update-cargo-toml-version.yml | 2 +- 15 files changed, 24 insertions(+), 24 deletions(-) diff --git a/.github/workflows/bench-manual.yml b/.github/workflows/bench-manual.yml index 3352a4b40..4a9d5fcfd 100644 --- a/.github/workflows/bench-manual.yml +++ b/.github/workflows/bench-manual.yml @@ -18,7 +18,7 @@ jobs: timeout-minutes: 180 # 3h steps: - uses: actions/checkout@v3 - - uses: actions-rs/toolchain@v1 + - uses: helix-editor/rust-toolchain@v1 with: profile: minimal diff --git a/.github/workflows/bench-pr.yml b/.github/workflows/bench-pr.yml index 7e8f26a13..6379a528c 100644 --- a/.github/workflows/bench-pr.yml +++ b/.github/workflows/bench-pr.yml @@ -35,7 +35,7 @@ jobs: fetch-depth: 0 # fetch full history to be able to get main commit sha ref: ${{ steps.comment-branch.outputs.head_ref }} - - uses: actions-rs/toolchain@v1 + - uses: helix-editor/rust-toolchain@v1 with: profile: minimal diff --git a/.github/workflows/bench-push-indexing.yml b/.github/workflows/bench-push-indexing.yml index 3d92899af..dfd1a3b09 100644 --- a/.github/workflows/bench-push-indexing.yml +++ b/.github/workflows/bench-push-indexing.yml @@ -12,7 +12,7 @@ jobs: timeout-minutes: 180 # 3h steps: - uses: actions/checkout@v3 - - uses: actions-rs/toolchain@v1 + - uses: helix-editor/rust-toolchain@v1 with: profile: minimal diff --git a/.github/workflows/benchmarks-manual.yml b/.github/workflows/benchmarks-manual.yml index e24c03d2b..19d477268 100644 --- a/.github/workflows/benchmarks-manual.yml +++ b/.github/workflows/benchmarks-manual.yml @@ -18,7 +18,7 @@ jobs: timeout-minutes: 4320 # 72h steps: - uses: actions/checkout@v3 - - uses: actions-rs/toolchain@v1 + - uses: helix-editor/rust-toolchain@v1 with: profile: minimal diff --git a/.github/workflows/benchmarks-pr.yml b/.github/workflows/benchmarks-pr.yml index 322f394e7..6a613dcb9 100644 --- a/.github/workflows/benchmarks-pr.yml +++ b/.github/workflows/benchmarks-pr.yml @@ -13,7 +13,7 @@ jobs: runs-on: benchmarks timeout-minutes: 4320 # 72h steps: - - uses: actions-rs/toolchain@v1 + - uses: helix-editor/rust-toolchain@v1 with: profile: minimal diff --git a/.github/workflows/benchmarks-push-indexing.yml b/.github/workflows/benchmarks-push-indexing.yml index e9c589504..ae6a4634a 100644 --- a/.github/workflows/benchmarks-push-indexing.yml +++ b/.github/workflows/benchmarks-push-indexing.yml @@ -16,7 +16,7 @@ jobs: timeout-minutes: 4320 # 72h steps: - uses: actions/checkout@v3 - - uses: actions-rs/toolchain@v1 + - uses: helix-editor/rust-toolchain@v1 with: profile: minimal diff --git a/.github/workflows/benchmarks-push-search-geo.yml b/.github/workflows/benchmarks-push-search-geo.yml index 652e92311..8f5f8d020 100644 --- a/.github/workflows/benchmarks-push-search-geo.yml +++ b/.github/workflows/benchmarks-push-search-geo.yml @@ -15,7 +15,7 @@ jobs: runs-on: benchmarks steps: - uses: actions/checkout@v3 - - uses: actions-rs/toolchain@v1 + - uses: helix-editor/rust-toolchain@v1 with: profile: minimal diff --git a/.github/workflows/benchmarks-push-search-songs.yml b/.github/workflows/benchmarks-push-search-songs.yml index a9b75ba80..a19990e07 100644 --- a/.github/workflows/benchmarks-push-search-songs.yml +++ b/.github/workflows/benchmarks-push-search-songs.yml @@ -15,7 +15,7 @@ jobs: runs-on: benchmarks steps: - uses: actions/checkout@v3 - - uses: actions-rs/toolchain@v1 + - uses: helix-editor/rust-toolchain@v1 with: profile: minimal diff --git a/.github/workflows/benchmarks-push-search-wiki.yml b/.github/workflows/benchmarks-push-search-wiki.yml index 0c3406e49..f7da07fda 100644 --- a/.github/workflows/benchmarks-push-search-wiki.yml +++ b/.github/workflows/benchmarks-push-search-wiki.yml @@ -15,7 +15,7 @@ jobs: runs-on: benchmarks steps: - uses: actions/checkout@v3 - - uses: actions-rs/toolchain@v1 + - uses: helix-editor/rust-toolchain@v1 with: profile: minimal diff --git a/.github/workflows/flaky-tests.yml b/.github/workflows/flaky-tests.yml index 4c008d6b3..d66417c45 100644 --- a/.github/workflows/flaky-tests.yml +++ b/.github/workflows/flaky-tests.yml @@ -16,7 +16,7 @@ jobs: run: | apt-get update && apt-get install -y curl apt-get install build-essential -y - - uses: actions-rs/toolchain@v1 + - uses: helix-editor/rust-toolchain@v1 - name: Install cargo-flaky run: cargo install cargo-flaky - name: Run cargo flaky in the dumps diff --git a/.github/workflows/fuzzer-indexing.yml b/.github/workflows/fuzzer-indexing.yml index ceaef57ac..5d1ecc7f8 100644 --- a/.github/workflows/fuzzer-indexing.yml +++ b/.github/workflows/fuzzer-indexing.yml @@ -12,7 +12,7 @@ jobs: timeout-minutes: 4320 # 72h steps: - uses: actions/checkout@v3 - - uses: actions-rs/toolchain@v1 + - uses: helix-editor/rust-toolchain@v1 with: profile: minimal diff --git a/.github/workflows/publish-apt-brew-pkg.yml b/.github/workflows/publish-apt-brew-pkg.yml index 628a67263..91b3ecfba 100644 --- a/.github/workflows/publish-apt-brew-pkg.yml +++ b/.github/workflows/publish-apt-brew-pkg.yml @@ -25,7 +25,7 @@ jobs: run: | apt-get update && apt-get install -y curl apt-get install build-essential -y - - uses: actions-rs/toolchain@v1 + - uses: helix-editor/rust-toolchain@v1 - name: Install cargo-deb run: cargo install cargo-deb - uses: actions/checkout@v3 diff --git a/.github/workflows/publish-binaries.yml b/.github/workflows/publish-binaries.yml index d9439cb32..4f475057f 100644 --- a/.github/workflows/publish-binaries.yml +++ b/.github/workflows/publish-binaries.yml @@ -45,7 +45,7 @@ jobs: run: | apt-get update && apt-get install -y curl apt-get install build-essential -y - - uses: actions-rs/toolchain@v1 + - uses: helix-editor/rust-toolchain@v1 - name: Build run: cargo build --release --locked # No need to upload binaries for dry run (cron) @@ -75,7 +75,7 @@ jobs: asset_name: meilisearch-windows-amd64.exe steps: - uses: actions/checkout@v3 - - uses: actions-rs/toolchain@v1 + - uses: helix-editor/rust-toolchain@v1 - name: Build run: cargo build --release --locked # No need to upload binaries for dry run (cron) @@ -101,7 +101,7 @@ jobs: - name: Checkout repository uses: actions/checkout@v3 - name: Installing Rust toolchain - uses: actions-rs/toolchain@v1 + uses: helix-editor/rust-toolchain@v1 with: profile: minimal target: ${{ matrix.target }} @@ -146,7 +146,7 @@ jobs: add-apt-repository "deb [arch=$(dpkg --print-architecture)] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" apt-get update -y && apt-get install -y docker-ce - name: Installing Rust toolchain - uses: actions-rs/toolchain@v1 + uses: helix-editor/rust-toolchain@v1 with: profile: minimal target: ${{ matrix.target }} diff --git a/.github/workflows/test-suite.yml b/.github/workflows/test-suite.yml index fd9238bf1..7205d2ea0 100644 --- a/.github/workflows/test-suite.yml +++ b/.github/workflows/test-suite.yml @@ -31,7 +31,7 @@ jobs: apt-get update && apt-get install -y curl apt-get install build-essential -y - name: Setup test with Rust stable - uses: actions-rs/toolchain@v1 + uses: helix-editor/rust-toolchain@v1 - name: Cache dependencies uses: Swatinem/rust-cache@v2.7.1 - name: Run cargo check without any default features @@ -56,7 +56,7 @@ jobs: - uses: actions/checkout@v3 - name: Cache dependencies uses: Swatinem/rust-cache@v2.7.1 - - uses: actions-rs/toolchain@v1 + - uses: helix-editor/rust-toolchain@v1 - name: Run cargo check without any default features uses: actions-rs/cargo@v1 with: @@ -81,7 +81,7 @@ jobs: run: | apt-get update apt-get install --assume-yes build-essential curl - - uses: actions-rs/toolchain@v1 + - uses: helix-editor/rust-toolchain@v1 - name: Run cargo build with almost all features run: | cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)" @@ -101,7 +101,7 @@ jobs: run: | apt-get update apt-get install --assume-yes build-essential curl - - uses: actions-rs/toolchain@v1 + - uses: helix-editor/rust-toolchain@v1 - name: Run cargo tree without default features and check lindera is not present run: | if cargo tree -f '{p} {f}' -e normal --no-default-features | grep -vqz lindera; then @@ -125,7 +125,7 @@ jobs: run: | apt-get update && apt-get install -y curl apt-get install build-essential -y - - uses: actions-rs/toolchain@v1 + - uses: helix-editor/rust-toolchain@v1 - name: Cache dependencies uses: Swatinem/rust-cache@v2.7.1 - name: Run tests in debug @@ -139,7 +139,7 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - uses: actions-rs/toolchain@v1 + - uses: helix-editor/rust-toolchain@v1 with: profile: minimal toolchain: 1.75.0 @@ -158,7 +158,7 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - uses: actions-rs/toolchain@v1 + - uses: helix-editor/rust-toolchain@v1 with: profile: minimal toolchain: nightly-2024-06-25 diff --git a/.github/workflows/update-cargo-toml-version.yml b/.github/workflows/update-cargo-toml-version.yml index 0f68bdc49..8b6d0a2d2 100644 --- a/.github/workflows/update-cargo-toml-version.yml +++ b/.github/workflows/update-cargo-toml-version.yml @@ -18,7 +18,7 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - uses: actions-rs/toolchain@v1 + - uses: helix-editor/rust-toolchain@v1 with: profile: minimal - name: Install sd From 2608a596a069b1b7c1b76c6ee1fae99ee7dbb463 Mon Sep 17 00:00:00 2001 From: karribalu Date: Tue, 25 Jun 2024 18:36:29 +0100 Subject: [PATCH 34/41] Update error message and add tests for incomplete compressed document --- meilisearch/src/error.rs | 2 +- meilisearch/tests/documents/add_documents.rs | 52 ++++++++++++++++++++ 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/meilisearch/src/error.rs b/meilisearch/src/error.rs index 3a38b36d1..96496a33f 100644 --- a/meilisearch/src/error.rs +++ b/meilisearch/src/error.rs @@ -111,7 +111,7 @@ impl From for MeilisearchHttpError { #[derive(Debug, thiserror::Error)] pub enum ActixPayloadError { - #[error("The provided payload is incomplete and cannot be decompressed")] + #[error("The provided payload is incomplete and cannot be parsed")] IncompleteError, #[error(transparent)] OtherError(aweb::error::PayloadError), diff --git a/meilisearch/tests/documents/add_documents.rs b/meilisearch/tests/documents/add_documents.rs index b1262fa2d..0169aade0 100644 --- a/meilisearch/tests/documents/add_documents.rs +++ b/meilisearch/tests/documents/add_documents.rs @@ -183,6 +183,58 @@ async fn add_single_document_gzip_encoded() { } "###); } +#[actix_rt::test] +async fn add_single_document_gzip_encoded_with_incomplete_error() { + let document = json!("kefir"); + + // this is a what is expected and should work + let server = Server::new().await; + let app = server.init_web_app().await; + // post + let document = serde_json::to_string(&document).unwrap(); + let req = test::TestRequest::post() + .uri("/indexes/dog/documents") + .set_payload(document.to_string()) + .insert_header(("content-type", "application/json")) + .insert_header(("content-encoding", "gzip")) + .to_request(); + let res = test::call_service(&app, req).await; + let status_code = res.status(); + let body = test::read_body(res).await; + let response: Value = serde_json::from_slice(&body).unwrap_or_default(); + snapshot!(status_code, @"400 Bad Request"); + snapshot!(json_string!(response), + @r###" + { + "message": "The provided payload is incomplete and cannot be parsed", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); + + // put + let req = test::TestRequest::put() + .uri("/indexes/dog/documents") + .set_payload(document.to_string()) + .insert_header(("content-type", "application/json")) + .insert_header(("content-encoding", "gzip")) + .to_request(); + let res = test::call_service(&app, req).await; + let status_code = res.status(); + let body = test::read_body(res).await; + let response: Value = serde_json::from_slice(&body).unwrap_or_default(); + snapshot!(status_code, @"400 Bad Request"); + snapshot!(json_string!(response), + @r###" + { + "message": "The provided payload is incomplete and cannot be parsed", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); +} /// Here we try document request with every encoding #[actix_rt::test] From 1374b661d1abe008804f27ea8690541ed43b9609 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 26 Jun 2024 09:14:59 +0200 Subject: [PATCH 35/41] fix a possibly flaky test --- meilisearch/tests/tasks/mod.rs | 38 +++++++++++++++++----------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/meilisearch/tests/tasks/mod.rs b/meilisearch/tests/tasks/mod.rs index ed387224e..89e6ff2c8 100644 --- a/meilisearch/tests/tasks/mod.rs +++ b/meilisearch/tests/tasks/mod.rs @@ -2,6 +2,7 @@ mod errors; mod webhook; use meili_snap::insta::assert_json_snapshot; +use meili_snap::snapshot; use time::format_description::well_known::Rfc3339; use time::OffsetDateTime; @@ -738,11 +739,9 @@ async fn test_summarized_index_creation() { async fn test_summarized_index_deletion() { let server = Server::new().await; let index = server.index("test"); - index.delete().await; - index.wait_task(0).await; - let (task, _) = index.get_task(0).await; - assert_json_snapshot!(task, - { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + let (ret, _code) = index.delete().await; + let task = index.wait_task(ret.uid()).await; + snapshot!(task, @r###" { "uid": 0, @@ -768,11 +767,9 @@ async fn test_summarized_index_deletion() { // is the details correctly set when documents are actually deleted. index.add_documents(json!({ "id": 42, "content": "doggos & fluff" }), Some("id")).await; - index.delete().await; - index.wait_task(2).await; - let (task, _) = index.get_task(2).await; - assert_json_snapshot!(task, - { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + let (ret, _code) = index.delete().await; + let task = index.wait_task(ret.uid()).await; + snapshot!(task, @r###" { "uid": 2, @@ -792,22 +789,25 @@ async fn test_summarized_index_deletion() { "###); // What happens when you delete an index that doesn't exists. - index.delete().await; - index.wait_task(2).await; - let (task, _) = index.get_task(2).await; - assert_json_snapshot!(task, - { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + let (ret, _code) = index.delete().await; + let task = index.wait_task(ret.uid()).await; + snapshot!(task, @r###" { - "uid": 2, + "uid": 3, "indexUid": "test", - "status": "succeeded", + "status": "failed", "type": "indexDeletion", "canceledBy": null, "details": { - "deletedDocuments": 1 + "deletedDocuments": 0 + }, + "error": { + "message": "Index `test` not found.", + "code": "index_not_found", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#index_not_found" }, - "error": null, "duration": "[duration]", "enqueuedAt": "[date]", "startedAt": "[date]", From 3d6b61d8d2b11b7a10190202d91bce0271162efd Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 26 Jun 2024 09:19:51 +0200 Subject: [PATCH 36/41] fix flakyness for real --- meilisearch/tests/tasks/mod.rs | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/meilisearch/tests/tasks/mod.rs b/meilisearch/tests/tasks/mod.rs index 89e6ff2c8..f2ed76b6a 100644 --- a/meilisearch/tests/tasks/mod.rs +++ b/meilisearch/tests/tasks/mod.rs @@ -766,7 +766,31 @@ async fn test_summarized_index_deletion() { "###); // is the details correctly set when documents are actually deleted. - index.add_documents(json!({ "id": 42, "content": "doggos & fluff" }), Some("id")).await; + // /!\ We need to wait for the document addition to be processed otherwise, if the test runs too slow, + // both tasks may get autobatched and the deleted documents count will be wrong. + let (ret, _code) = + index.add_documents(json!({ "id": 42, "content": "doggos & fluff" }), Some("id")).await; + let task = index.wait_task(ret.uid()).await; + snapshot!(task, + @r###" + { + "uid": 1, + "indexUid": "test", + "status": "succeeded", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 1, + "indexedDocuments": 1 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + let (ret, _code) = index.delete().await; let task = index.wait_task(ret.uid()).await; snapshot!(task, From 544e98ca992a0ee00d306cb495c085c23ce5c129 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 26 Jun 2024 13:58:25 +0200 Subject: [PATCH 37/41] use teh current version for clippy --- .github/workflows/test-suite.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/test-suite.yml b/.github/workflows/test-suite.yml index 7205d2ea0..c5e022bd7 100644 --- a/.github/workflows/test-suite.yml +++ b/.github/workflows/test-suite.yml @@ -142,8 +142,6 @@ jobs: - uses: helix-editor/rust-toolchain@v1 with: profile: minimal - toolchain: 1.75.0 - override: true components: clippy - name: Cache dependencies uses: Swatinem/rust-cache@v2.7.1 From a1dcde6b9aadc14102ca17d26ab9770d373bf889 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 26 Jun 2024 14:00:21 +0200 Subject: [PATCH 38/41] Update meilisearch/src/extractors/authentication/mod.rs Co-authored-by: Many the fish --- meilisearch/src/extractors/authentication/mod.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/meilisearch/src/extractors/authentication/mod.rs b/meilisearch/src/extractors/authentication/mod.rs index ecd1cadf8..28a6d770e 100644 --- a/meilisearch/src/extractors/authentication/mod.rs +++ b/meilisearch/src/extractors/authentication/mod.rs @@ -277,7 +277,8 @@ pub mod policies { index: index.to_string(), }); } else { - // Otherwise we can share the + // Otherwise we can share the list + // of authorized indexes in the API key. return Err(AuthError::ApiKeyAccessingnUnauthorizedIndex { index: index.to_string(), allowed: auth_filter.api_key_list_index_authorized(), From e28332a904e78a98642fe7a9fd3527c0e749dbe5 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 26 Jun 2024 14:01:28 +0200 Subject: [PATCH 39/41] set the rust toolchain to the v1.75.0 --- rust-toolchain.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust-toolchain.toml b/rust-toolchain.toml index 5be51f3c2..4739bf10a 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,3 +1,3 @@ [toolchain] -channel = "1.78.0" +channel = "1.75.0" components = ["clippy"] From eb292a7a62d63d986a689a84be557ea80e3c3dd9 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 26 Jun 2024 10:00:54 +0200 Subject: [PATCH 40/41] Fix the missing geo distance when one or both of the lat / lng are string --- meilisearch/src/search.rs | 62 ++++++++++++++++++++++++++++++++- meilisearch/tests/search/geo.rs | 3 +- 2 files changed, 63 insertions(+), 2 deletions(-) diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index 0c2c49452..7f258b952 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -1150,13 +1150,23 @@ fn insert_geo_distance(sorts: &[String], document: &mut Document) { // TODO: TAMO: milli encountered an internal error, what do we want to do? let base = [capture_group[1].parse().unwrap(), capture_group[2].parse().unwrap()]; let geo_point = &document.get("_geo").unwrap_or(&json!(null)); - if let Some((lat, lng)) = geo_point["lat"].as_f64().zip(geo_point["lng"].as_f64()) { + if let Some((lat, lng)) = + extract_geo_value(&geo_point["lat"]).zip(extract_geo_value(&geo_point["lng"])) + { let distance = milli::distance_between_two_points(&base, &[lat, lng]); document.insert("_geoDistance".to_string(), json!(distance.round() as usize)); } } } +fn extract_geo_value(value: &Value) -> Option { + match value { + Value::Number(n) => n.as_f64(), + Value::String(s) => s.parse().ok(), + _ => None, + } +} + fn compute_formatted_options( attr_to_highlight: &HashSet, attr_to_crop: &[String], @@ -1530,4 +1540,54 @@ mod test { insert_geo_distance(sorters, &mut document); assert_eq!(document.get("_geoDistance"), None); } + + #[test] + fn test_insert_geo_distance_with_coords_as_string() { + let value: Document = serde_json::from_str( + r#"{ + "_geo": { + "lat": "50", + "lng": 3 + } + }"#, + ) + .unwrap(); + + let sorters = &["_geoPoint(50,3):desc".to_string()]; + let mut document = value.clone(); + insert_geo_distance(sorters, &mut document); + assert_eq!(document.get("_geoDistance"), Some(&json!(0))); + + let value: Document = serde_json::from_str( + r#"{ + "_geo": { + "lat": "50", + "lng": "3" + }, + "id": "1" + }"#, + ) + .unwrap(); + + let sorters = &["_geoPoint(50,3):desc".to_string()]; + let mut document = value.clone(); + insert_geo_distance(sorters, &mut document); + assert_eq!(document.get("_geoDistance"), Some(&json!(0))); + + let value: Document = serde_json::from_str( + r#"{ + "_geo": { + "lat": 50, + "lng": "3" + }, + "id": "1" + }"#, + ) + .unwrap(); + + let sorters = &["_geoPoint(50,3):desc".to_string()]; + let mut document = value.clone(); + insert_geo_distance(sorters, &mut document); + assert_eq!(document.get("_geoDistance"), Some(&json!(0))); + } } diff --git a/meilisearch/tests/search/geo.rs b/meilisearch/tests/search/geo.rs index 8754453ba..7804f1ad0 100644 --- a/meilisearch/tests/search/geo.rs +++ b/meilisearch/tests/search/geo.rs @@ -150,7 +150,8 @@ async fn bug_4640() { "_geo": { "lat": "45.4777599", "lng": "9.1967508" - } + }, + "_geoDistance": 0 }, { "id": 1, From a108d8f6f3b24c5583630961029df2942e840f3c Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 17 Jun 2024 16:21:51 +0200 Subject: [PATCH 41/41] update yaup --- Cargo.lock | 15 ++++++++------- meilisearch/Cargo.toml | 3 +-- meilisearch/tests/common/index.rs | 4 ++-- meilisearch/tests/search/errors.rs | 22 +++++++++++----------- meilisearch/tests/similar/errors.rs | 4 ++-- 5 files changed, 24 insertions(+), 24 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e31943cf3..f20d2f289 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5063,18 +5063,18 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.58" +version = "1.0.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03468839009160513471e86a034bb2c5c0e4baae3b43f79ffc55c4a5427b3297" +checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.58" +version = "1.0.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7" +checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" dependencies = [ "proc-macro2", "quote", @@ -6090,12 +6090,13 @@ dependencies = [ [[package]] name = "yaup" -version = "0.2.1" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a59e7d27bed43f7c37c25df5192ea9d435a8092a902e02203359ac9ce3e429d9" +checksum = "b0144f1a16a199846cb21024da74edd930b43443463292f536b7110b4855b5c6" dependencies = [ + "form_urlencoded", "serde", - "url", + "thiserror", ] [[package]] diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml index eacd3dd8e..4ce2b5fb3 100644 --- a/meilisearch/Cargo.toml +++ b/meilisearch/Cargo.toml @@ -98,7 +98,6 @@ tokio-stream = "0.1.14" toml = "0.8.8" uuid = { version = "1.6.1", features = ["serde", "v4"] } walkdir = "2.4.0" -yaup = "0.2.1" serde_urlencoded = "0.7.1" termcolor = "1.4.1" url = { version = "2.5.0", features = ["serde"] } @@ -118,7 +117,7 @@ maplit = "1.0.2" meili-snap = { path = "../meili-snap" } temp-env = "0.3.6" urlencoding = "2.1.3" -yaup = "0.2.1" +yaup = "0.3.1" [build-dependencies] anyhow = { version = "1.0.79", optional = true } diff --git a/meilisearch/tests/common/index.rs b/meilisearch/tests/common/index.rs index 3ac33b4e9..f914f8dc8 100644 --- a/meilisearch/tests/common/index.rs +++ b/meilisearch/tests/common/index.rs @@ -376,7 +376,7 @@ impl Index<'_> { } pub async fn search_get(&self, query: &str) -> (Value, StatusCode) { - let url = format!("/indexes/{}/search?{}", urlencode(self.uid.as_ref()), query); + let url = format!("/indexes/{}/search{}", urlencode(self.uid.as_ref()), query); self.service.get(url).await } @@ -413,7 +413,7 @@ impl Index<'_> { } pub async fn similar_get(&self, query: &str) -> (Value, StatusCode) { - let url = format!("/indexes/{}/similar?{}", urlencode(self.uid.as_ref()), query); + let url = format!("/indexes/{}/similar{}", urlencode(self.uid.as_ref()), query); self.service.get(url).await } diff --git a/meilisearch/tests/search/errors.rs b/meilisearch/tests/search/errors.rs index cce1a86e7..0119be59e 100644 --- a/meilisearch/tests/search/errors.rs +++ b/meilisearch/tests/search/errors.rs @@ -71,7 +71,7 @@ async fn search_bad_offset() { } "###); - let (response, code) = index.search_get("offset=doggo").await; + let (response, code) = index.search_get("?offset=doggo").await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -99,7 +99,7 @@ async fn search_bad_limit() { } "###); - let (response, code) = index.search_get("limit=doggo").await; + let (response, code) = index.search_get("?limit=doggo").await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -127,7 +127,7 @@ async fn search_bad_page() { } "###); - let (response, code) = index.search_get("page=doggo").await; + let (response, code) = index.search_get("?page=doggo").await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -155,7 +155,7 @@ async fn search_bad_hits_per_page() { } "###); - let (response, code) = index.search_get("hitsPerPage=doggo").await; + let (response, code) = index.search_get("?hitsPerPage=doggo").await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -201,7 +201,7 @@ async fn search_bad_crop_length() { } "###); - let (response, code) = index.search_get("cropLength=doggo").await; + let (response, code) = index.search_get("?cropLength=doggo").await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -291,7 +291,7 @@ async fn search_bad_show_matches_position() { } "###); - let (response, code) = index.search_get("showMatchesPosition=doggo").await; + let (response, code) = index.search_get("?showMatchesPosition=doggo").await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -340,7 +340,7 @@ async fn search_non_filterable_facets() { } "###); - let (response, code) = index.search_get("facets=doggo").await; + let (response, code) = index.search_get("?facets=doggo").await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -370,7 +370,7 @@ async fn search_non_filterable_facets_multiple_filterable() { } "###); - let (response, code) = index.search_get("facets=doggo").await; + let (response, code) = index.search_get("?facets=doggo").await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -400,7 +400,7 @@ async fn search_non_filterable_facets_no_filterable() { } "###); - let (response, code) = index.search_get("facets=doggo").await; + let (response, code) = index.search_get("?facets=doggo").await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -430,7 +430,7 @@ async fn search_non_filterable_facets_multiple_facets() { } "###); - let (response, code) = index.search_get("facets=doggo,neko").await; + let (response, code) = index.search_get("?facets=doggo,neko").await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -523,7 +523,7 @@ async fn search_bad_matching_strategy() { } "###); - let (response, code) = index.search_get("matchingStrategy=doggo").await; + let (response, code) = index.search_get("?matchingStrategy=doggo").await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { diff --git a/meilisearch/tests/similar/errors.rs b/meilisearch/tests/similar/errors.rs index 64386a7bf..a6cb3cdbc 100644 --- a/meilisearch/tests/similar/errors.rs +++ b/meilisearch/tests/similar/errors.rs @@ -179,7 +179,7 @@ async fn similar_bad_offset() { } "###); - let (response, code) = index.similar_get("id=287947&offset=doggo").await; + let (response, code) = index.similar_get("?id=287947&offset=doggo").await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -221,7 +221,7 @@ async fn similar_bad_limit() { } "###); - let (response, code) = index.similar_get("id=287946&limit=doggo").await; + let (response, code) = index.similar_get("?id=287946&limit=doggo").await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" {