From 8095f21999965429e46b6e12d2bc1ffab83414c8 Mon Sep 17 00:00:00 2001 From: Jakub Jirutka Date: Sat, 6 May 2023 18:10:54 +0200 Subject: [PATCH 01/73] Move comments above keys in config.toml The current style is very unusual, confusing and breaks compatibility with tools for parsing config files including comments. Everyone writes comments above the items to which they refer (maybe except pythonists), so let's stick to that. --- config.toml | 52 ++++++++++++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/config.toml b/config.toml index 71087f25f..71872d0d4 100644 --- a/config.toml +++ b/config.toml @@ -1,128 +1,128 @@ # This file shows the default configuration of Meilisearch. # All variables are defined here: https://www.meilisearch.com/docs/learn/configuration/instance_options#environment-variables -db_path = "./data.ms" # Designates the location where database files will be created and retrieved. # https://www.meilisearch.com/docs/learn/configuration/instance_options#database-path +db_path = "./data.ms" -env = "development" # Configures the instance's environment. Value must be either `production` or `development`. # https://www.meilisearch.com/docs/learn/configuration/instance_options#environment +env = "development" -http_addr = "localhost:7700" # The address on which the HTTP server will listen. +http_addr = "localhost:7700" -# master_key = "YOUR_MASTER_KEY_VALUE" # Sets the instance's master key, automatically protecting all routes except GET /health. # https://www.meilisearch.com/docs/learn/configuration/instance_options#master-key +# master_key = "YOUR_MASTER_KEY_VALUE" -# no_analytics = true # Deactivates Meilisearch's built-in telemetry when provided. # Meilisearch automatically collects data from all instances that do not opt out using this flag. # All gathered data is used solely for the purpose of improving Meilisearch, and can be deleted at any time. # https://www.meilisearch.com/docs/learn/configuration/instance_options#disable-analytics +# no_analytics = true -http_payload_size_limit = "100 MB" # Sets the maximum size of accepted payloads. # https://www.meilisearch.com/docs/learn/configuration/instance_options#payload-limit-size +http_payload_size_limit = "100 MB" -log_level = "INFO" # Defines how much detail should be present in Meilisearch's logs. # Meilisearch currently supports six log levels, listed in order of increasing verbosity: `OFF`, `ERROR`, `WARN`, `INFO`, `DEBUG`, `TRACE` # https://www.meilisearch.com/docs/learn/configuration/instance_options#log-level +log_level = "INFO" -# max_indexing_memory = "2 GiB" # Sets the maximum amount of RAM Meilisearch can use when indexing. # https://www.meilisearch.com/docs/learn/configuration/instance_options#max-indexing-memory +# max_indexing_memory = "2 GiB" -# max_indexing_threads = 4 # Sets the maximum number of threads Meilisearch can use during indexing. # https://www.meilisearch.com/docs/learn/configuration/instance_options#max-indexing-threads +# max_indexing_threads = 4 ############# ### DUMPS ### ############# -dump_dir = "dumps/" # Sets the directory where Meilisearch will create dump files. # https://www.meilisearch.com/docs/learn/configuration/instance_options#dump-directory +dump_dir = "dumps/" -# import_dump = "./path/to/my/file.dump" # Imports the dump file located at the specified path. Path must point to a .dump file. # https://www.meilisearch.com/docs/learn/configuration/instance_options#import-dump +# import_dump = "./path/to/my/file.dump" -ignore_missing_dump = false # Prevents Meilisearch from throwing an error when `import_dump` does not point to a valid dump file. # https://www.meilisearch.com/docs/learn/configuration/instance_options#ignore-missing-dump +ignore_missing_dump = false -ignore_dump_if_db_exists = false # Prevents a Meilisearch instance with an existing database from throwing an error when using `import_dump`. # https://www.meilisearch.com/docs/learn/configuration/instance_options#ignore-dump-if-db-exists +ignore_dump_if_db_exists = false ################# ### SNAPSHOTS ### ################# -schedule_snapshot = false # Enables scheduled snapshots when true, disable when false (the default). # If the value is given as an integer, then enables the scheduled snapshot with the passed value as the interval # between each snapshot, in seconds. # https://www.meilisearch.com/docs/learn/configuration/instance_options#schedule-snapshot-creation +schedule_snapshot = false -snapshot_dir = "snapshots/" # Sets the directory where Meilisearch will store snapshots. # https://www.meilisearch.com/docs/learn/configuration/instance_options#snapshot-destination +snapshot_dir = "snapshots/" -# import_snapshot = "./path/to/my/snapshot" # Launches Meilisearch after importing a previously-generated snapshot at the given filepath. # https://www.meilisearch.com/docs/learn/configuration/instance_options#import-snapshot +# import_snapshot = "./path/to/my/snapshot" -ignore_missing_snapshot = false # Prevents a Meilisearch instance from throwing an error when `import_snapshot` does not point to a valid snapshot file. # https://www.meilisearch.com/docs/learn/configuration/instance_options#ignore-missing-snapshot +ignore_missing_snapshot = false -ignore_snapshot_if_db_exists = false # Prevents a Meilisearch instance with an existing database from throwing an error when using `import_snapshot`. # https://www.meilisearch.com/docs/learn/configuration/instance_options#ignore-snapshot-if-db-exists +ignore_snapshot_if_db_exists = false ########### ### SSL ### ########### -# ssl_auth_path = "./path/to/root" # Enables client authentication in the specified path. # https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-authentication-path +# ssl_auth_path = "./path/to/root" -# ssl_cert_path = "./path/to/certfile" # Sets the server's SSL certificates. # https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-certificates-path +# ssl_cert_path = "./path/to/certfile" -# ssl_key_path = "./path/to/private-key" # Sets the server's SSL key files. # https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-key-path +# ssl_key_path = "./path/to/private-key" -# ssl_ocsp_path = "./path/to/ocsp-file" # Sets the server's OCSP file. # https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-ocsp-path +# ssl_ocsp_path = "./path/to/ocsp-file" -ssl_require_auth = false # Makes SSL authentication mandatory. # https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-require-auth +ssl_require_auth = false -ssl_resumption = false # Activates SSL session resumption. # https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-resumption +ssl_resumption = false -ssl_tickets = false # Activates SSL tickets. # https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-tickets +ssl_tickets = false ############################# ### Experimental features ### ############################# -experimental_enable_metrics = false # Experimental metrics feature. For more information, see: # Enables the Prometheus metrics on the `GET /metrics` endpoint. +experimental_enable_metrics = false From 36253890575b9446d0e2d07bbd66cf55f4ba8ac1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Wed, 3 May 2023 13:39:19 +0200 Subject: [PATCH 02/73] Highlight ngram matches as well --- milli/src/search/new/mod.rs | 4 ++-- milli/src/search/new/query_graph.rs | 14 ++++++++++---- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs index 7e8426bf9..cbc085b12 100644 --- a/milli/src/search/new/mod.rs +++ b/milli/src/search/new/mod.rs @@ -397,8 +397,8 @@ pub fn execute_search( None }; let bucket_sort_output = if let Some(query_terms) = query_terms { - let graph = QueryGraph::from_query(ctx, &query_terms)?; - located_query_terms = Some(query_terms); + let (graph, new_located_query_terms) = QueryGraph::from_query(ctx, &query_terms)?; + located_query_terms = Some(new_located_query_terms); let ranking_rules = get_ranking_rules_for_query_graph_search( ctx, diff --git a/milli/src/search/new/query_graph.rs b/milli/src/search/new/query_graph.rs index 0e7d5a7f3..dc25d1bc3 100644 --- a/milli/src/search/new/query_graph.rs +++ b/milli/src/search/new/query_graph.rs @@ -88,12 +88,15 @@ pub struct QueryGraph { } impl QueryGraph { - /// Build the query graph from the parsed user search query. + /// Build the query graph from the parsed user search query, return an updated list of the located query terms + /// which contains ngrams. pub fn from_query( ctx: &mut SearchContext, // NOTE: the terms here must be consecutive terms: &[LocatedQueryTerm], - ) -> Result { + ) -> Result<(QueryGraph, Vec)> { + let mut new_located_query_terms = terms.to_vec(); + let nbr_typos = number_of_typos_allowed(ctx)?; let mut nodes_data: Vec = vec![QueryNodeData::Start, QueryNodeData::End]; @@ -107,10 +110,11 @@ impl QueryGraph { let original_terms_len = terms.len(); for term_idx in 0..original_terms_len { let mut new_nodes = vec![]; + let new_node_idx = add_node( &mut nodes_data, QueryNodeData::Term(LocatedQueryTermSubset { - term_subset: QueryTermSubset::full(Interned::from_raw(term_idx as u16)), + term_subset: QueryTermSubset::full(terms[term_idx].value), positions: terms[term_idx].positions.clone(), term_ids: term_idx as u8..=term_idx as u8, }), @@ -121,6 +125,7 @@ impl QueryGraph { if let Some(ngram) = query_term::make_ngram(ctx, &terms[term_idx - 1..=term_idx], &nbr_typos)? { + new_located_query_terms.push(ngram.clone()); let ngram_idx = add_node( &mut nodes_data, QueryNodeData::Term(LocatedQueryTermSubset { @@ -136,6 +141,7 @@ impl QueryGraph { if let Some(ngram) = query_term::make_ngram(ctx, &terms[term_idx - 2..=term_idx], &nbr_typos)? { + new_located_query_terms.push(ngram.clone()); let ngram_idx = add_node( &mut nodes_data, QueryNodeData::Term(LocatedQueryTermSubset { @@ -167,7 +173,7 @@ impl QueryGraph { let mut graph = QueryGraph { root_node, end_node, nodes }; graph.build_initial_edges(); - Ok(graph) + Ok((graph, new_located_query_terms)) } /// Remove the given nodes, connecting all their predecessors to all their successors. From d08f8690d2a8a960292117cfb4ae2a0b6d83ef5f Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 9 May 2023 19:52:11 +0200 Subject: [PATCH 03/73] add analytics on the get documents resource --- meilisearch/src/analytics/mock_analytics.rs | 4 +- meilisearch/src/analytics/mod.rs | 12 +++ .../src/analytics/segment_analytics.rs | 102 +++++++++++++++++- meilisearch/src/routes/indexes/documents.rs | 32 +++++- 4 files changed, 146 insertions(+), 4 deletions(-) diff --git a/meilisearch/src/analytics/mock_analytics.rs b/meilisearch/src/analytics/mock_analytics.rs index 03aed0189..68c3a7dff 100644 --- a/meilisearch/src/analytics/mock_analytics.rs +++ b/meilisearch/src/analytics/mock_analytics.rs @@ -5,7 +5,7 @@ use actix_web::HttpRequest; use meilisearch_types::InstanceUid; use serde_json::Value; -use super::{find_user_id, Analytics, DocumentDeletionKind}; +use super::{find_user_id, Analytics, DocumentDeletionKind, DocumentFetchKind}; use crate::routes::indexes::documents::UpdateDocumentsQuery; use crate::routes::tasks::TasksFilterQuery; use crate::Opt; @@ -71,6 +71,8 @@ impl Analytics for MockAnalytics { _request: &HttpRequest, ) { } + fn get_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {} + fn post_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {} fn get_tasks(&self, _query: &TasksFilterQuery, _request: &HttpRequest) {} fn health_seen(&self, _request: &HttpRequest) {} } diff --git a/meilisearch/src/analytics/mod.rs b/meilisearch/src/analytics/mod.rs index 6223b9db7..c48564dff 100644 --- a/meilisearch/src/analytics/mod.rs +++ b/meilisearch/src/analytics/mod.rs @@ -67,6 +67,12 @@ pub enum DocumentDeletionKind { PerFilter, } +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum DocumentFetchKind { + PerDocumentId, + Normal { with_filter: bool, limit: usize, offset: usize }, +} + pub trait Analytics: Sync + Send { fn instance_uid(&self) -> Option<&InstanceUid>; @@ -90,6 +96,12 @@ pub trait Analytics: Sync + Send { request: &HttpRequest, ); + // this method should be called to aggregate a fetch documents request + fn get_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest); + + // this method should be called to aggregate a fetch documents request + fn post_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest); + // this method should be called to aggregate a add documents request fn delete_documents(&self, kind: DocumentDeletionKind, request: &HttpRequest); diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 3e40c09e8..d640c4ef0 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -23,7 +23,9 @@ use tokio::select; use tokio::sync::mpsc::{self, Receiver, Sender}; use uuid::Uuid; -use super::{config_user_id_path, DocumentDeletionKind, MEILISEARCH_CONFIG_PATH}; +use super::{ + config_user_id_path, DocumentDeletionKind, DocumentFetchKind, MEILISEARCH_CONFIG_PATH, +}; use crate::analytics::Analytics; use crate::option::{default_http_addr, IndexerOpts, MaxMemory, MaxThreads, ScheduleSnapshot}; use crate::routes::indexes::documents::UpdateDocumentsQuery; @@ -72,6 +74,8 @@ pub enum AnalyticsMsg { AggregateAddDocuments(DocumentsAggregator), AggregateDeleteDocuments(DocumentsDeletionAggregator), AggregateUpdateDocuments(DocumentsAggregator), + AggregateGetFetchDocuments(DocumentsFetchAggregator), + AggregatePostFetchDocuments(DocumentsFetchAggregator), AggregateTasks(TasksAggregator), AggregateHealth(HealthAggregator), } @@ -139,6 +143,8 @@ impl SegmentAnalytics { add_documents_aggregator: DocumentsAggregator::default(), delete_documents_aggregator: DocumentsDeletionAggregator::default(), update_documents_aggregator: DocumentsAggregator::default(), + get_fetch_documents_aggregator: DocumentsFetchAggregator::default(), + post_fetch_documents_aggregator: DocumentsFetchAggregator::default(), get_tasks_aggregator: TasksAggregator::default(), health_aggregator: HealthAggregator::default(), }); @@ -205,6 +211,16 @@ impl super::Analytics for SegmentAnalytics { let _ = self.sender.try_send(AnalyticsMsg::AggregateUpdateDocuments(aggregate)); } + fn get_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest) { + let aggregate = DocumentsFetchAggregator::from_query(documents_query, request); + let _ = self.sender.try_send(AnalyticsMsg::AggregateGetFetchDocuments(aggregate)); + } + + fn post_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest) { + let aggregate = DocumentsFetchAggregator::from_query(documents_query, request); + let _ = self.sender.try_send(AnalyticsMsg::AggregatePostFetchDocuments(aggregate)); + } + fn get_tasks(&self, query: &TasksFilterQuery, request: &HttpRequest) { let aggregate = TasksAggregator::from_query(query, request); let _ = self.sender.try_send(AnalyticsMsg::AggregateTasks(aggregate)); @@ -338,6 +354,8 @@ pub struct Segment { add_documents_aggregator: DocumentsAggregator, delete_documents_aggregator: DocumentsDeletionAggregator, update_documents_aggregator: DocumentsAggregator, + get_fetch_documents_aggregator: DocumentsFetchAggregator, + post_fetch_documents_aggregator: DocumentsFetchAggregator, get_tasks_aggregator: TasksAggregator, health_aggregator: HealthAggregator, } @@ -400,6 +418,8 @@ impl Segment { Some(AnalyticsMsg::AggregateAddDocuments(agreg)) => self.add_documents_aggregator.aggregate(agreg), Some(AnalyticsMsg::AggregateDeleteDocuments(agreg)) => self.delete_documents_aggregator.aggregate(agreg), Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg), + Some(AnalyticsMsg::AggregateGetFetchDocuments(agreg)) => self.get_fetch_documents_aggregator.aggregate(agreg), + Some(AnalyticsMsg::AggregatePostFetchDocuments(agreg)) => self.post_fetch_documents_aggregator.aggregate(agreg), Some(AnalyticsMsg::AggregateTasks(agreg)) => self.get_tasks_aggregator.aggregate(agreg), Some(AnalyticsMsg::AggregateHealth(agreg)) => self.health_aggregator.aggregate(agreg), None => (), @@ -450,6 +470,10 @@ impl Segment { .into_event(&self.user, "Documents Deleted"); let update_documents = std::mem::take(&mut self.update_documents_aggregator) .into_event(&self.user, "Documents Updated"); + let get_fetch_documents = std::mem::take(&mut self.get_fetch_documents_aggregator) + .into_event(&self.user, "Documents Fetched GET"); + let post_fetch_documents = std::mem::take(&mut self.post_fetch_documents_aggregator) + .into_event(&self.user, "Documents Fetched POST"); let get_tasks = std::mem::take(&mut self.get_tasks_aggregator).into_event(&self.user, "Tasks Seen"); let health = @@ -473,6 +497,12 @@ impl Segment { if let Some(update_documents) = update_documents { let _ = self.batcher.push(update_documents).await; } + if let Some(get_fetch_documents) = get_fetch_documents { + let _ = self.batcher.push(get_fetch_documents).await; + } + if let Some(post_fetch_documents) = post_fetch_documents { + let _ = self.batcher.push(post_fetch_documents).await; + } if let Some(get_tasks) = get_tasks { let _ = self.batcher.push(get_tasks).await; } @@ -1135,3 +1165,73 @@ impl HealthAggregator { }) } } + +#[derive(Default, Serialize)] +pub struct DocumentsFetchAggregator { + #[serde(skip)] + timestamp: Option, + + // context + #[serde(rename = "user-agent")] + user_agents: HashSet, + + total_received: usize, + + // a call on ../documents/:doc_id + per_document_id: bool, + // if a filter was used + per_filter: bool, + + // pagination + max_limit: usize, + max_offset: usize, +} + +impl DocumentsFetchAggregator { + pub fn from_query(query: &DocumentFetchKind, request: &HttpRequest) -> Self { + let (limit, offset) = match query { + DocumentFetchKind::PerDocumentId => (1, 0), + DocumentFetchKind::Normal { limit, offset, .. } => (*limit, *offset), + }; + Self { + timestamp: Some(OffsetDateTime::now_utc()), + user_agents: extract_user_agents(request).into_iter().collect(), + total_received: 1, + per_document_id: matches!(query, DocumentFetchKind::PerDocumentId), + per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter), + max_limit: limit, + max_offset: offset, + } + } + + /// Aggregate one [DocumentsFetchAggregator] into another. + pub fn aggregate(&mut self, other: Self) { + if self.timestamp.is_none() { + self.timestamp = other.timestamp; + } + for user_agent in other.user_agents { + self.user_agents.insert(user_agent); + } + + self.total_received = self.total_received.saturating_add(other.total_received); + self.per_document_id |= other.per_document_id; + self.per_filter |= other.per_filter; + + self.max_limit = self.max_limit.max(other.max_limit); + self.max_offset |= self.max_offset.max(other.max_offset); + } + + pub fn into_event(self, user: &User, event_name: &str) -> Option { + // if we had no timestamp it means we never encountered any events and + // thus we don't need to send this event. + let timestamp = self.timestamp?; + + Some(Track { + timestamp: Some(timestamp), + user: user.clone(), + event: event_name.to_string(), + properties: serde_json::to_value(self).ok()?, + ..Default::default() + }) + } +} diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs index eb0f5a59e..dcc4ed04f 100644 --- a/meilisearch/src/routes/indexes/documents.rs +++ b/meilisearch/src/routes/indexes/documents.rs @@ -29,7 +29,7 @@ use tempfile::tempfile; use tokio::fs::File; use tokio::io::{AsyncSeekExt, AsyncWriteExt, BufWriter}; -use crate::analytics::{Analytics, DocumentDeletionKind}; +use crate::analytics::{Analytics, DocumentDeletionKind, DocumentFetchKind}; use crate::error::MeilisearchHttpError; use crate::error::PayloadError::ReceivePayload; use crate::extractors::authentication::policies::*; @@ -97,10 +97,14 @@ pub async fn get_document( index_scheduler: GuardedData, Data>, document_param: web::Path, params: AwebQueryParameter, + req: HttpRequest, + analytics: web::Data, ) -> Result { let DocumentParam { index_uid, document_id } = document_param.into_inner(); let index_uid = IndexUid::try_from(index_uid)?; + analytics.get_fetch_documents(&DocumentFetchKind::PerDocumentId, &req); + let GetDocument { fields } = params.into_inner(); let attributes_to_retrieve = fields.merge_star_and_none(); @@ -161,16 +165,31 @@ pub async fn documents_by_query_post( index_scheduler: GuardedData, Data>, index_uid: web::Path, body: AwebJson, + req: HttpRequest, + analytics: web::Data, ) -> Result { debug!("called with body: {:?}", body); - documents_by_query(&index_scheduler, index_uid, body.into_inner()) + let body = body.into_inner(); + + analytics.post_fetch_documents( + &DocumentFetchKind::Normal { + with_filter: body.filter.is_some(), + limit: body.limit, + offset: body.offset, + }, + &req, + ); + + documents_by_query(&index_scheduler, index_uid, body) } pub async fn get_documents( index_scheduler: GuardedData, Data>, index_uid: web::Path, params: AwebQueryParameter, + req: HttpRequest, + analytics: web::Data, ) -> Result { debug!("called with params: {:?}", params); @@ -191,6 +210,15 @@ pub async fn get_documents( filter, }; + analytics.get_fetch_documents( + &DocumentFetchKind::Normal { + with_filter: query.filter.is_some(), + limit: query.limit, + offset: query.offset, + }, + &req, + ); + documents_by_query(&index_scheduler, index_uid, query) } From c42a65a29714a8d0c2d89b63ed53533383f829c6 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 10 May 2023 10:55:56 +0200 Subject: [PATCH 04/73] Update meilisearch/src/analytics/segment_analytics.rs Co-authored-by: Louis Dureuil --- meilisearch/src/analytics/segment_analytics.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index d640c4ef0..64adc389d 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -1218,7 +1218,7 @@ impl DocumentsFetchAggregator { self.per_filter |= other.per_filter; self.max_limit = self.max_limit.max(other.max_limit); - self.max_offset |= self.max_offset.max(other.max_offset); + self.max_offset = self.max_offset.max(other.max_offset); } pub fn into_event(self, user: &User, event_name: &str) -> Option { From 46ec8a97e984b6d5e49421c982bfc2988f8bd31e Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 10 May 2023 14:02:42 +0200 Subject: [PATCH 05/73] rename the analytics according to the spec --- meilisearch/src/analytics/segment_analytics.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 64adc389d..b64ec9223 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -1175,6 +1175,7 @@ pub struct DocumentsFetchAggregator { #[serde(rename = "user-agent")] user_agents: HashSet, + #[serde(rename = "requests.max_limit")] total_received: usize, // a call on ../documents/:doc_id @@ -1183,7 +1184,9 @@ pub struct DocumentsFetchAggregator { per_filter: bool, // pagination + #[serde(rename = "pagination.max_limit")] max_limit: usize, + #[serde(rename = "pagination.max_offset")] max_offset: usize, } From c4a40e711066c88ca0c61579284d9ff0c463e8af Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Mon, 15 May 2023 10:15:33 +0200 Subject: [PATCH 06/73] Use the writemap flag to reduce the memory usage --- Cargo.lock | 6 +-- index-scheduler/src/index_mapper/mod.rs | 9 +++- index-scheduler/src/lib.rs | 27 +++++++---- meilisearch-auth/src/store.rs | 6 ++- milli/Cargo.toml | 2 +- milli/src/index.rs | 61 +++++++++++++++---------- milli/src/update/facet/mod.rs | 4 +- 7 files changed, 74 insertions(+), 41 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5f192b6d1..87298f665 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1794,7 +1794,7 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "heed" version = "0.12.5" -source = "git+https://github.com/meilisearch/heed?tag=v0.12.5#4158a6c484752afaaf9e2530a6ee0e7ab0f24ee8" +source = "git+https://github.com/meilisearch/heed?branch=create-db-no-sub-txn#ba64ce016e939ff1a35cfaa1989dba7057cb2812" dependencies = [ "byteorder", "heed-traits", @@ -1811,12 +1811,12 @@ dependencies = [ [[package]] name = "heed-traits" version = "0.7.0" -source = "git+https://github.com/meilisearch/heed?tag=v0.12.5#4158a6c484752afaaf9e2530a6ee0e7ab0f24ee8" +source = "git+https://github.com/meilisearch/heed?branch=create-db-no-sub-txn#ba64ce016e939ff1a35cfaa1989dba7057cb2812" [[package]] name = "heed-types" version = "0.7.2" -source = "git+https://github.com/meilisearch/heed?tag=v0.12.5#4158a6c484752afaaf9e2530a6ee0e7ab0f24ee8" +source = "git+https://github.com/meilisearch/heed?branch=create-db-no-sub-txn#ba64ce016e939ff1a35cfaa1989dba7057cb2812" dependencies = [ "bincode", "heed-traits", diff --git a/index-scheduler/src/index_mapper/mod.rs b/index-scheduler/src/index_mapper/mod.rs index 2bf6f46ad..7d373c528 100644 --- a/index-scheduler/src/index_mapper/mod.rs +++ b/index-scheduler/src/index_mapper/mod.rs @@ -125,10 +125,15 @@ impl IndexMapper { index_count: usize, indexer_config: IndexerConfig, ) -> Result { + let mut wtxn = env.write_txn()?; + let index_mapping = env.create_database(&mut wtxn, Some(INDEX_MAPPING))?; + let index_stats = env.create_database(&mut wtxn, Some(INDEX_STATS))?; + wtxn.commit()?; + Ok(Self { index_map: Arc::new(RwLock::new(IndexMap::new(index_count))), - index_mapping: env.create_database(Some(INDEX_MAPPING))?, - index_stats: env.create_database(Some(INDEX_STATS))?, + index_mapping, + index_stats, base_path, index_base_map_size, index_growth_amount, diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index af20ba1ae..b3607e85e 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -396,19 +396,30 @@ impl IndexScheduler { .open(options.tasks_path)?; let file_store = FileStore::new(&options.update_file_path)?; + let mut wtxn = env.write_txn()?; + let all_tasks = env.create_database(&mut wtxn, Some(db_name::ALL_TASKS))?; + let status = env.create_database(&mut wtxn, Some(db_name::STATUS))?; + let kind = env.create_database(&mut wtxn, Some(db_name::KIND))?; + let index_tasks = env.create_database(&mut wtxn, Some(db_name::INDEX_TASKS))?; + let canceled_by = env.create_database(&mut wtxn, Some(db_name::CANCELED_BY))?; + let enqueued_at = env.create_database(&mut wtxn, Some(db_name::ENQUEUED_AT))?; + let started_at = env.create_database(&mut wtxn, Some(db_name::STARTED_AT))?; + let finished_at = env.create_database(&mut wtxn, Some(db_name::FINISHED_AT))?; + wtxn.commit()?; + // allow unreachable_code to get rids of the warning in the case of a test build. let this = Self { must_stop_processing: MustStopProcessing::default(), processing_tasks: Arc::new(RwLock::new(ProcessingTasks::new())), file_store, - all_tasks: env.create_database(Some(db_name::ALL_TASKS))?, - status: env.create_database(Some(db_name::STATUS))?, - kind: env.create_database(Some(db_name::KIND))?, - index_tasks: env.create_database(Some(db_name::INDEX_TASKS))?, - canceled_by: env.create_database(Some(db_name::CANCELED_BY))?, - enqueued_at: env.create_database(Some(db_name::ENQUEUED_AT))?, - started_at: env.create_database(Some(db_name::STARTED_AT))?, - finished_at: env.create_database(Some(db_name::FINISHED_AT))?, + all_tasks, + status, + kind, + index_tasks, + canceled_by, + enqueued_at, + started_at, + finished_at, index_mapper: IndexMapper::new( &env, options.indexes_path, diff --git a/meilisearch-auth/src/store.rs b/meilisearch-auth/src/store.rs index 5c2776154..3e4acc88e 100644 --- a/meilisearch-auth/src/store.rs +++ b/meilisearch-auth/src/store.rs @@ -55,9 +55,11 @@ impl HeedAuthStore { let path = path.as_ref().join(AUTH_DB_PATH); create_dir_all(&path)?; let env = Arc::new(open_auth_store_env(path.as_ref())?); - let keys = env.create_database(Some(KEY_DB_NAME))?; + let mut wtxn = env.write_txn()?; + let keys = env.create_database(&mut wtxn, Some(KEY_DB_NAME))?; let action_keyid_index_expiration = - env.create_database(Some(KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME))?; + env.create_database(&mut wtxn, Some(KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME))?; + wtxn.commit()?; Ok(Self { env, keys, action_keyid_index_expiration, should_close_on_drop: true }) } diff --git a/milli/Cargo.toml b/milli/Cargo.toml index de0f4e31d..bb5b505d1 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -26,7 +26,7 @@ fst = "0.4.7" fxhash = "0.2.1" geoutils = "0.5.1" grenad = { version = "0.4.4", default-features = false, features = ["tempfile"] } -heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.5", default-features = false, features = ["lmdb", "sync-read-txn"] } +heed = { git = "https://github.com/meilisearch/heed", branch = "create-db-no-sub-txn", default-features = false, features = ["lmdb", "sync-read-txn"] } json-depth-checker = { path = "../json-depth-checker" } levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] } memmap2 = "0.5.10" diff --git a/milli/src/index.rs b/milli/src/index.rs index ad53e79ea..bc14b7195 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -167,36 +167,49 @@ impl Index { use db_name::*; options.max_dbs(23); - unsafe { options.flag(Flags::MdbAlwaysFreePages) }; + unsafe { options.flag(Flags::MdbAlwaysFreePages).flag(Flags::MdbWriteMap) }; let env = options.open(path)?; - let main = env.create_poly_database(Some(MAIN))?; - let word_docids = env.create_database(Some(WORD_DOCIDS))?; - let exact_word_docids = env.create_database(Some(EXACT_WORD_DOCIDS))?; - let word_prefix_docids = env.create_database(Some(WORD_PREFIX_DOCIDS))?; - let exact_word_prefix_docids = env.create_database(Some(EXACT_WORD_PREFIX_DOCIDS))?; - let docid_word_positions = env.create_database(Some(DOCID_WORD_POSITIONS))?; - let word_pair_proximity_docids = env.create_database(Some(WORD_PAIR_PROXIMITY_DOCIDS))?; - let script_language_docids = env.create_database(Some(SCRIPT_LANGUAGE_DOCIDS))?; + let mut wtxn = env.write_txn()?; + let main = env.create_poly_database(&mut wtxn, Some(MAIN))?; + let word_docids = env.create_database(&mut wtxn, Some(WORD_DOCIDS))?; + let exact_word_docids = env.create_database(&mut wtxn, Some(EXACT_WORD_DOCIDS))?; + let word_prefix_docids = env.create_database(&mut wtxn, Some(WORD_PREFIX_DOCIDS))?; + let exact_word_prefix_docids = + env.create_database(&mut wtxn, Some(EXACT_WORD_PREFIX_DOCIDS))?; + let docid_word_positions = env.create_database(&mut wtxn, Some(DOCID_WORD_POSITIONS))?; + let word_pair_proximity_docids = + env.create_database(&mut wtxn, Some(WORD_PAIR_PROXIMITY_DOCIDS))?; + let script_language_docids = + env.create_database(&mut wtxn, Some(SCRIPT_LANGUAGE_DOCIDS))?; let word_prefix_pair_proximity_docids = - env.create_database(Some(WORD_PREFIX_PAIR_PROXIMITY_DOCIDS))?; + env.create_database(&mut wtxn, Some(WORD_PREFIX_PAIR_PROXIMITY_DOCIDS))?; let prefix_word_pair_proximity_docids = - env.create_database(Some(PREFIX_WORD_PAIR_PROXIMITY_DOCIDS))?; - let word_position_docids = env.create_database(Some(WORD_POSITION_DOCIDS))?; - let word_fid_docids = env.create_database(Some(WORD_FIELD_ID_DOCIDS))?; - let field_id_word_count_docids = env.create_database(Some(FIELD_ID_WORD_COUNT_DOCIDS))?; - let word_prefix_position_docids = env.create_database(Some(WORD_PREFIX_POSITION_DOCIDS))?; - let word_prefix_fid_docids = env.create_database(Some(WORD_PREFIX_FIELD_ID_DOCIDS))?; - let facet_id_f64_docids = env.create_database(Some(FACET_ID_F64_DOCIDS))?; - let facet_id_string_docids = env.create_database(Some(FACET_ID_STRING_DOCIDS))?; - let facet_id_exists_docids = env.create_database(Some(FACET_ID_EXISTS_DOCIDS))?; - let facet_id_is_null_docids = env.create_database(Some(FACET_ID_IS_NULL_DOCIDS))?; - let facet_id_is_empty_docids = env.create_database(Some(FACET_ID_IS_EMPTY_DOCIDS))?; + env.create_database(&mut wtxn, Some(PREFIX_WORD_PAIR_PROXIMITY_DOCIDS))?; + let word_position_docids = env.create_database(&mut wtxn, Some(WORD_POSITION_DOCIDS))?; + let word_fid_docids = env.create_database(&mut wtxn, Some(WORD_FIELD_ID_DOCIDS))?; + let field_id_word_count_docids = + env.create_database(&mut wtxn, Some(FIELD_ID_WORD_COUNT_DOCIDS))?; + let word_prefix_position_docids = + env.create_database(&mut wtxn, Some(WORD_PREFIX_POSITION_DOCIDS))?; + let word_prefix_fid_docids = + env.create_database(&mut wtxn, Some(WORD_PREFIX_FIELD_ID_DOCIDS))?; + let facet_id_f64_docids = env.create_database(&mut wtxn, Some(FACET_ID_F64_DOCIDS))?; + let facet_id_string_docids = + env.create_database(&mut wtxn, Some(FACET_ID_STRING_DOCIDS))?; + let facet_id_exists_docids = + env.create_database(&mut wtxn, Some(FACET_ID_EXISTS_DOCIDS))?; + let facet_id_is_null_docids = + env.create_database(&mut wtxn, Some(FACET_ID_IS_NULL_DOCIDS))?; + let facet_id_is_empty_docids = + env.create_database(&mut wtxn, Some(FACET_ID_IS_EMPTY_DOCIDS))?; - let field_id_docid_facet_f64s = env.create_database(Some(FIELD_ID_DOCID_FACET_F64S))?; + let field_id_docid_facet_f64s = + env.create_database(&mut wtxn, Some(FIELD_ID_DOCID_FACET_F64S))?; let field_id_docid_facet_strings = - env.create_database(Some(FIELD_ID_DOCID_FACET_STRINGS))?; - let documents = env.create_database(Some(DOCUMENTS))?; + env.create_database(&mut wtxn, Some(FIELD_ID_DOCID_FACET_STRINGS))?; + let documents = env.create_database(&mut wtxn, Some(DOCUMENTS))?; + wtxn.commit()?; Index::set_creation_dates(&env, main, created_at, updated_at)?; diff --git a/milli/src/update/facet/mod.rs b/milli/src/update/facet/mod.rs index 39a3ef437..2fd748d4d 100644 --- a/milli/src/update/facet/mod.rs +++ b/milli/src/update/facet/mod.rs @@ -261,7 +261,9 @@ pub(crate) mod test_helpers { let options = options.map_size(4096 * 4 * 1000 * 100); let tempdir = tempfile::TempDir::new().unwrap(); let env = options.open(tempdir.path()).unwrap(); - let content = env.create_database(None).unwrap(); + let mut wtxn = env.write_txn().unwrap(); + let content = env.create_database(&mut wtxn, None).unwrap(); + wtxn.commit().unwrap(); FacetIndex { content, From 23d1c868259c3a7224e0df00e1469eb76a707851 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Mon, 15 May 2023 11:07:23 +0200 Subject: [PATCH 07/73] Re-introduce the sort error message fix --- meilisearch/tests/search/errors.rs | 2 +- milli/src/error.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/meilisearch/tests/search/errors.rs b/meilisearch/tests/search/errors.rs index a9a2969bb..d402860be 100644 --- a/meilisearch/tests/search/errors.rs +++ b/meilisearch/tests/search/errors.rs @@ -946,7 +946,7 @@ async fn sort_unset_ranking_rule() { index.wait_task(1).await; let expected_response = json!({ - "message": "The sort ranking rule must be specified in the ranking rules settings to use the sort parameter at search time.", + "message": "You must specify where \"sort\" is listed in the rankingRules setting to use the sort parameter at search time", "code": "invalid_search_sort", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_sort" diff --git a/milli/src/error.rs b/milli/src/error.rs index 7f0faf2fd..6d3f402e2 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -126,7 +126,7 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco InvalidSortableAttribute { field: String, valid_fields: BTreeSet }, #[error("{}", HeedError::BadOpenOptions)] InvalidLmdbOpenOptions, - #[error("The sort ranking rule must be specified in the ranking rules settings to use the sort parameter at search time.")] + #[error("You must specify where \"sort\" is listed in the rankingRules setting to use the sort parameter at search time")] SortRankingRuleMissing, #[error("The database file is in an invalid state.")] InvalidStoreFile, From 4d691d071a700f5391364b16b286049973e3c494 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Mon, 15 May 2023 11:10:36 +0200 Subject: [PATCH 08/73] Change double-quotes by back-ticks in sort error message --- meilisearch/tests/search/errors.rs | 2 +- milli/src/error.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/meilisearch/tests/search/errors.rs b/meilisearch/tests/search/errors.rs index d402860be..528043908 100644 --- a/meilisearch/tests/search/errors.rs +++ b/meilisearch/tests/search/errors.rs @@ -946,7 +946,7 @@ async fn sort_unset_ranking_rule() { index.wait_task(1).await; let expected_response = json!({ - "message": "You must specify where \"sort\" is listed in the rankingRules setting to use the sort parameter at search time", + "message": "You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time", "code": "invalid_search_sort", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_sort" diff --git a/milli/src/error.rs b/milli/src/error.rs index 6d3f402e2..fb87ced80 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -126,7 +126,7 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco InvalidSortableAttribute { field: String, valid_fields: BTreeSet }, #[error("{}", HeedError::BadOpenOptions)] InvalidLmdbOpenOptions, - #[error("You must specify where \"sort\" is listed in the rankingRules setting to use the sort parameter at search time")] + #[error("You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time")] SortRankingRuleMissing, #[error("The database file is in an invalid state.")] InvalidStoreFile, From f759ec7fad18ff0e583ba36b2568f3254fb70f46 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Mon, 15 May 2023 11:23:58 +0200 Subject: [PATCH 09/73] Expose a flag to enable the MDB_WRITEMAP flag --- index-scheduler/src/index_mapper/index_map.rs | 38 +++++++++++++++---- index-scheduler/src/index_mapper/mod.rs | 12 +++++- index-scheduler/src/lib.rs | 8 ++++ .../src/analytics/segment_analytics.rs | 3 ++ meilisearch/src/lib.rs | 1 + meilisearch/src/main.rs | 5 +++ meilisearch/src/option.rs | 21 ++++++++++ milli/src/index.rs | 2 +- 8 files changed, 80 insertions(+), 10 deletions(-) diff --git a/index-scheduler/src/index_mapper/index_map.rs b/index-scheduler/src/index_mapper/index_map.rs index d140d4944..9bed4fe5d 100644 --- a/index-scheduler/src/index_mapper/index_map.rs +++ b/index-scheduler/src/index_mapper/index_map.rs @@ -5,6 +5,7 @@ use std::collections::BTreeMap; use std::path::Path; use std::time::Duration; +use meilisearch_types::heed::flags::Flags; use meilisearch_types::heed::{EnvClosingEvent, EnvOpenOptions}; use meilisearch_types::milli::Index; use time::OffsetDateTime; @@ -53,6 +54,7 @@ pub struct IndexMap { pub struct ClosingIndex { uuid: Uuid, closing_event: EnvClosingEvent, + enable_mdb_writemap: bool, map_size: usize, generation: usize, } @@ -68,6 +70,7 @@ impl ClosingIndex { pub fn wait_timeout(self, timeout: Duration) -> Option { self.closing_event.wait_timeout(timeout).then_some(ReopenableIndex { uuid: self.uuid, + enable_mdb_writemap: self.enable_mdb_writemap, map_size: self.map_size, generation: self.generation, }) @@ -76,6 +79,7 @@ impl ClosingIndex { pub struct ReopenableIndex { uuid: Uuid, + enable_mdb_writemap: bool, map_size: usize, generation: usize, } @@ -103,7 +107,7 @@ impl ReopenableIndex { return Ok(()); } map.unavailable.remove(&self.uuid); - map.create(&self.uuid, path, None, self.map_size)?; + map.create(&self.uuid, path, None, self.enable_mdb_writemap, self.map_size)?; } Ok(()) } @@ -170,16 +174,17 @@ impl IndexMap { uuid: &Uuid, path: &Path, date: Option<(OffsetDateTime, OffsetDateTime)>, + enable_mdb_writemap: bool, map_size: usize, ) -> Result { if !matches!(self.get_unavailable(uuid), Missing) { panic!("Attempt to open an index that was unavailable"); } - let index = create_or_open_index(path, date, map_size)?; + let index = create_or_open_index(path, date, enable_mdb_writemap, map_size)?; match self.available.insert(*uuid, index.clone()) { InsertionOutcome::InsertedNew => (), InsertionOutcome::Evicted(evicted_uuid, evicted_index) => { - self.close(evicted_uuid, evicted_index, 0); + self.close(evicted_uuid, evicted_index, enable_mdb_writemap, 0); } InsertionOutcome::Replaced(_) => { panic!("Attempt to open an index that was already opened") @@ -212,17 +217,30 @@ impl IndexMap { /// | Closing | Closing | /// | Available | Closing | /// - pub fn close_for_resize(&mut self, uuid: &Uuid, map_size_growth: usize) { + pub fn close_for_resize( + &mut self, + uuid: &Uuid, + enable_mdb_writemap: bool, + map_size_growth: usize, + ) { let Some(index) = self.available.remove(uuid) else { return; }; - self.close(*uuid, index, map_size_growth); + self.close(*uuid, index, enable_mdb_writemap, map_size_growth); } - fn close(&mut self, uuid: Uuid, index: Index, map_size_growth: usize) { + fn close( + &mut self, + uuid: Uuid, + index: Index, + enable_mdb_writemap: bool, + map_size_growth: usize, + ) { let map_size = index.map_size().unwrap_or(DEFAULT_MAP_SIZE) + map_size_growth; let closing_event = index.prepare_for_closing(); let generation = self.next_generation(); - self.unavailable - .insert(uuid, Some(ClosingIndex { uuid, closing_event, map_size, generation })); + self.unavailable.insert( + uuid, + Some(ClosingIndex { uuid, closing_event, enable_mdb_writemap, map_size, generation }), + ); } /// Attempts to delete and index. @@ -282,11 +300,15 @@ impl IndexMap { fn create_or_open_index( path: &Path, date: Option<(OffsetDateTime, OffsetDateTime)>, + enable_mdb_writemap: bool, map_size: usize, ) -> Result { let mut options = EnvOpenOptions::new(); options.map_size(clamp_to_page_size(map_size)); options.max_readers(1024); + if enable_mdb_writemap { + unsafe { options.flag(Flags::MdbWriteMap) }; + } if let Some((created, updated)) = date { Ok(Index::new_with_creation_dates(options, path, created, updated)?) diff --git a/index-scheduler/src/index_mapper/mod.rs b/index-scheduler/src/index_mapper/mod.rs index 7d373c528..5160ebd63 100644 --- a/index-scheduler/src/index_mapper/mod.rs +++ b/index-scheduler/src/index_mapper/mod.rs @@ -66,6 +66,8 @@ pub struct IndexMapper { index_base_map_size: usize, /// The quantity by which the map size of an index is incremented upon reopening, in bytes. index_growth_amount: usize, + /// Weither we open a meilisearch index with the MDB_WRITEMAP option or not. + enable_mdb_writemap: bool, pub indexer_config: Arc, } @@ -123,6 +125,7 @@ impl IndexMapper { index_base_map_size: usize, index_growth_amount: usize, index_count: usize, + enable_mdb_writemap: bool, indexer_config: IndexerConfig, ) -> Result { let mut wtxn = env.write_txn()?; @@ -137,6 +140,7 @@ impl IndexMapper { base_path, index_base_map_size, index_growth_amount, + enable_mdb_writemap, indexer_config: Arc::new(indexer_config), }) } @@ -167,6 +171,7 @@ impl IndexMapper { &uuid, &index_path, date, + self.enable_mdb_writemap, self.index_base_map_size, )?; @@ -278,7 +283,11 @@ impl IndexMapper { .ok_or_else(|| Error::IndexNotFound(name.to_string()))?; // We remove the index from the in-memory index map. - self.index_map.write().unwrap().close_for_resize(&uuid, self.index_growth_amount); + self.index_map.write().unwrap().close_for_resize( + &uuid, + self.enable_mdb_writemap, + self.index_growth_amount, + ); Ok(()) } @@ -343,6 +352,7 @@ impl IndexMapper { &uuid, &index_path, None, + self.enable_mdb_writemap, self.index_base_map_size, )?; } diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index b3607e85e..607a4a407 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -233,6 +233,8 @@ pub struct IndexSchedulerOptions { pub task_db_size: usize, /// The size, in bytes, with which a meilisearch index is opened the first time of each meilisearch index. pub index_base_map_size: usize, + /// Weither we open a meilisearch index with the MDB_WRITEMAP option or not. + pub enable_mdb_writemap: bool, /// The size, in bytes, by which the map size of an index is increased when it resized due to being full. pub index_growth_amount: usize, /// The number of indexes that can be concurrently opened in memory. @@ -374,6 +376,10 @@ impl IndexScheduler { std::fs::create_dir_all(&options.indexes_path)?; std::fs::create_dir_all(&options.dumps_path)?; + if cfg!(windows) && options.enable_mdb_writemap { + panic!("Windows doesn't support the MDB_WRITEMAP LMDB option"); + } + let task_db_size = clamp_to_page_size(options.task_db_size); let budget = if options.indexer_config.skip_index_budget { IndexBudget { @@ -426,6 +432,7 @@ impl IndexScheduler { budget.map_size, options.index_growth_amount, budget.index_count, + options.enable_mdb_writemap, options.indexer_config, )?, env, @@ -1482,6 +1489,7 @@ mod tests { dumps_path: tempdir.path().join("dumps"), task_db_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose. index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose. + enable_mdb_writemap: false, index_growth_amount: 1000 * 1000, // 1 MB index_count: 5, indexer_config, diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 3e40c09e8..9b465b8d8 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -225,6 +225,7 @@ impl super::Analytics for SegmentAnalytics { struct Infos { env: String, experimental_enable_metrics: bool, + experimental_reduce_indexing_memory_usage: bool, db_path: bool, import_dump: bool, dump_dir: bool, @@ -258,6 +259,7 @@ impl From for Infos { let Opt { db_path, experimental_enable_metrics, + experimental_reduce_indexing_memory_usage, http_addr, master_key: _, env, @@ -300,6 +302,7 @@ impl From for Infos { Self { env, experimental_enable_metrics, + experimental_reduce_indexing_memory_usage, db_path: db_path != PathBuf::from("./data.ms"), import_dump: import_dump.is_some(), dump_dir: dump_dir != PathBuf::from("dumps/"), diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs index 67d8bbd5c..bee53f6f8 100644 --- a/meilisearch/src/lib.rs +++ b/meilisearch/src/lib.rs @@ -232,6 +232,7 @@ fn open_or_create_database_unchecked( dumps_path: opt.dump_dir.clone(), task_db_size: opt.max_task_db_size.get_bytes() as usize, index_base_map_size: opt.max_index_size.get_bytes() as usize, + enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage, indexer_config: (&opt.indexer_options).try_into()?, autobatching_enabled: true, max_number_of_tasks: 1_000_000, diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs index 2ab37488c..1b5e918dc 100644 --- a/meilisearch/src/main.rs +++ b/meilisearch/src/main.rs @@ -29,6 +29,11 @@ fn setup(opt: &Opt) -> anyhow::Result<()> { async fn main() -> anyhow::Result<()> { let (opt, config_read_from) = Opt::try_build()?; + anyhow::ensure!( + !(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage), + "The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows" + ); + setup(&opt)?; match (opt.env.as_ref(), &opt.master_key) { diff --git a/meilisearch/src/option.rs b/meilisearch/src/option.rs index 8e6ca9006..83fbeb333 100644 --- a/meilisearch/src/option.rs +++ b/meilisearch/src/option.rs @@ -48,6 +48,8 @@ const MEILI_IGNORE_DUMP_IF_DB_EXISTS: &str = "MEILI_IGNORE_DUMP_IF_DB_EXISTS"; const MEILI_DUMP_DIR: &str = "MEILI_DUMP_DIR"; const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL"; const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS"; +const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str = + "MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE"; const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml"; const DEFAULT_DB_PATH: &str = "./data.ms"; @@ -293,6 +295,20 @@ pub struct Opt { #[serde(default)] pub experimental_enable_metrics: bool, + /// Experimentally reduces the amount of RAM used by the engine when indexing documents. + /// + /// You must not use this flag in production. It is experimental and can corrupt the database + /// or be removed in future versions. It can also be stabilized or directly integrated + /// into the engine later. + /// + /// This flag enables the MDB_WRITEMAP option of LMDB, making the internal key-value store + /// use much less RAM than usual. Unfortunately, it can reduce the write speed of it and therefore + /// slow down the engine. You can read more and tell us about your experience on the dedicated + /// discussion: . + #[clap(long, env = MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE)] + #[serde(default)] + pub experimental_reduce_indexing_memory_usage: bool, + #[serde(flatten)] #[clap(flatten)] pub indexer_options: IndexerOpts, @@ -385,6 +401,7 @@ impl Opt { #[cfg(all(not(debug_assertions), feature = "analytics"))] no_analytics, experimental_enable_metrics: enable_metrics_route, + experimental_reduce_indexing_memory_usage: reduce_indexing_memory_usage, } = self; export_to_env_if_not_present(MEILI_DB_PATH, db_path); export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr); @@ -426,6 +443,10 @@ impl Opt { MEILI_EXPERIMENTAL_ENABLE_METRICS, enable_metrics_route.to_string(), ); + export_to_env_if_not_present( + MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE, + reduce_indexing_memory_usage.to_string(), + ); indexer_options.export_to_env(); } diff --git a/milli/src/index.rs b/milli/src/index.rs index bc14b7195..9ea7b628c 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -167,7 +167,7 @@ impl Index { use db_name::*; options.max_dbs(23); - unsafe { options.flag(Flags::MdbAlwaysFreePages).flag(Flags::MdbWriteMap) }; + unsafe { options.flag(Flags::MdbAlwaysFreePages) }; let env = options.open(path)?; let mut wtxn = env.write_txn()?; From 1a79fd0c3c2b2078af797ec310e1569787f09b1d Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Mon, 15 May 2023 11:42:30 +0200 Subject: [PATCH 10/73] Use the new heed v0.12.6 --- Cargo.lock | 6 +++--- milli/Cargo.toml | 22 ++++++++++++++++------ 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 87298f665..ff4981d11 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1794,7 +1794,7 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "heed" version = "0.12.5" -source = "git+https://github.com/meilisearch/heed?branch=create-db-no-sub-txn#ba64ce016e939ff1a35cfaa1989dba7057cb2812" +source = "git+https://github.com/meilisearch/heed?tag=v0.12.6#8c5b94225fc949c02bb7b900cc50ffaf6b584b1e" dependencies = [ "byteorder", "heed-traits", @@ -1811,12 +1811,12 @@ dependencies = [ [[package]] name = "heed-traits" version = "0.7.0" -source = "git+https://github.com/meilisearch/heed?branch=create-db-no-sub-txn#ba64ce016e939ff1a35cfaa1989dba7057cb2812" +source = "git+https://github.com/meilisearch/heed?tag=v0.12.6#8c5b94225fc949c02bb7b900cc50ffaf6b584b1e" [[package]] name = "heed-types" version = "0.7.2" -source = "git+https://github.com/meilisearch/heed?branch=create-db-no-sub-txn#ba64ce016e939ff1a35cfaa1989dba7057cb2812" +source = "git+https://github.com/meilisearch/heed?tag=v0.12.6#8c5b94225fc949c02bb7b900cc50ffaf6b584b1e" dependencies = [ "bincode", "heed-traits", diff --git a/milli/Cargo.toml b/milli/Cargo.toml index bb5b505d1..be4c88f23 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -25,8 +25,13 @@ flatten-serde-json = { path = "../flatten-serde-json" } fst = "0.4.7" fxhash = "0.2.1" geoutils = "0.5.1" -grenad = { version = "0.4.4", default-features = false, features = ["tempfile"] } -heed = { git = "https://github.com/meilisearch/heed", branch = "create-db-no-sub-txn", default-features = false, features = ["lmdb", "sync-read-txn"] } +grenad = { version = "0.4.4", default-features = false, features = [ + "tempfile", +] } +heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.6", default-features = false, features = [ + "lmdb", + "sync-read-txn", +] } json-depth-checker = { path = "../json-depth-checker" } levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] } memmap2 = "0.5.10" @@ -39,12 +44,17 @@ rstar = { version = "0.10.0", features = ["serde"] } serde = { version = "1.0.160", features = ["derive"] } serde_json = { version = "1.0.95", features = ["preserve_order"] } slice-group-by = "0.3.0" -smallstr = { version = "0.3.0", features = ["serde"] } +smallstr = { version = "0.3.0", features = ["serde"] } smallvec = "1.10.0" smartstring = "1.0.1" tempfile = "3.5.0" thiserror = "1.0.40" -time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] } +time = { version = "0.3.20", features = [ + "serde-well-known", + "formatting", + "parsing", + "macros", +] } uuid = { version = "1.3.1", features = ["v4"] } filter-parser = { path = "../filter-parser" } @@ -63,13 +73,13 @@ big_s = "1.0.2" insta = "1.29.0" maplit = "1.0.2" md5 = "0.7.0" -rand = {version = "0.8.5", features = ["small_rng"] } +rand = { version = "0.8.5", features = ["small_rng"] } [target.'cfg(fuzzing)'.dev-dependencies] fuzzcheck = "0.12.1" [features] -all-tokenizations = [ "charabia/default" ] +all-tokenizations = ["charabia/default"] # Use POSIX semaphores instead of SysV semaphores in LMDB # For more information on this feature, see heed's Cargo.toml From 13f870e993d22ac245ab95dec3001959746e76b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Mon, 15 May 2023 15:08:28 +0200 Subject: [PATCH 11/73] Fix typos and documentation issues --- index-scheduler/src/index_mapper/mod.rs | 2 +- index-scheduler/src/lib.rs | 3 ++- meilisearch/src/option.rs | 11 +---------- 3 files changed, 4 insertions(+), 12 deletions(-) diff --git a/index-scheduler/src/index_mapper/mod.rs b/index-scheduler/src/index_mapper/mod.rs index 5160ebd63..2903a824f 100644 --- a/index-scheduler/src/index_mapper/mod.rs +++ b/index-scheduler/src/index_mapper/mod.rs @@ -66,7 +66,7 @@ pub struct IndexMapper { index_base_map_size: usize, /// The quantity by which the map size of an index is incremented upon reopening, in bytes. index_growth_amount: usize, - /// Weither we open a meilisearch index with the MDB_WRITEMAP option or not. + /// Whether we open a meilisearch index with the MDB_WRITEMAP option or not. enable_mdb_writemap: bool, pub indexer_config: Arc, } diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 607a4a407..c5eaf2735 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -233,7 +233,7 @@ pub struct IndexSchedulerOptions { pub task_db_size: usize, /// The size, in bytes, with which a meilisearch index is opened the first time of each meilisearch index. pub index_base_map_size: usize, - /// Weither we open a meilisearch index with the MDB_WRITEMAP option or not. + /// Whether we open a meilisearch index with the MDB_WRITEMAP option or not. pub enable_mdb_writemap: bool, /// The size, in bytes, by which the map size of an index is increased when it resized due to being full. pub index_growth_amount: usize, @@ -377,6 +377,7 @@ impl IndexScheduler { std::fs::create_dir_all(&options.dumps_path)?; if cfg!(windows) && options.enable_mdb_writemap { + // programmer error if this happens: in normal use passing the option on Windows is an error in main panic!("Windows doesn't support the MDB_WRITEMAP LMDB option"); } diff --git a/meilisearch/src/option.rs b/meilisearch/src/option.rs index 83fbeb333..0511b5033 100644 --- a/meilisearch/src/option.rs +++ b/meilisearch/src/option.rs @@ -295,16 +295,7 @@ pub struct Opt { #[serde(default)] pub experimental_enable_metrics: bool, - /// Experimentally reduces the amount of RAM used by the engine when indexing documents. - /// - /// You must not use this flag in production. It is experimental and can corrupt the database - /// or be removed in future versions. It can also be stabilized or directly integrated - /// into the engine later. - /// - /// This flag enables the MDB_WRITEMAP option of LMDB, making the internal key-value store - /// use much less RAM than usual. Unfortunately, it can reduce the write speed of it and therefore - /// slow down the engine. You can read more and tell us about your experience on the dedicated - /// discussion: . + /// Experimental RAM reduction during indexing, do not use in production, see: #[clap(long, env = MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE)] #[serde(default)] pub experimental_reduce_indexing_memory_usage: bool, From dc7ba77e57a2a1a363e61169f74b7f8ee4f2b832 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Mon, 15 May 2023 16:07:43 +0200 Subject: [PATCH 12/73] Add the option in the config file --- config.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/config.toml b/config.toml index 71087f25f..e5b339ffa 100644 --- a/config.toml +++ b/config.toml @@ -126,3 +126,6 @@ ssl_tickets = false experimental_enable_metrics = false # Experimental metrics feature. For more information, see: # Enables the Prometheus metrics on the `GET /metrics` endpoint. + +experimental_reduce_indexing_memory_usage = false +# Experimental RAM reduction during indexing, do not use in production, see: From 85d96d35a857a45ce70f45d5996b5b30fc3ff1c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Wed, 3 May 2023 13:39:19 +0200 Subject: [PATCH 13/73] Highlight ngram matches as well --- milli/src/search/new/mod.rs | 4 ++-- milli/src/search/new/query_graph.rs | 14 ++++++++++---- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs index 7e8426bf9..cbc085b12 100644 --- a/milli/src/search/new/mod.rs +++ b/milli/src/search/new/mod.rs @@ -397,8 +397,8 @@ pub fn execute_search( None }; let bucket_sort_output = if let Some(query_terms) = query_terms { - let graph = QueryGraph::from_query(ctx, &query_terms)?; - located_query_terms = Some(query_terms); + let (graph, new_located_query_terms) = QueryGraph::from_query(ctx, &query_terms)?; + located_query_terms = Some(new_located_query_terms); let ranking_rules = get_ranking_rules_for_query_graph_search( ctx, diff --git a/milli/src/search/new/query_graph.rs b/milli/src/search/new/query_graph.rs index 0e7d5a7f3..dc25d1bc3 100644 --- a/milli/src/search/new/query_graph.rs +++ b/milli/src/search/new/query_graph.rs @@ -88,12 +88,15 @@ pub struct QueryGraph { } impl QueryGraph { - /// Build the query graph from the parsed user search query. + /// Build the query graph from the parsed user search query, return an updated list of the located query terms + /// which contains ngrams. pub fn from_query( ctx: &mut SearchContext, // NOTE: the terms here must be consecutive terms: &[LocatedQueryTerm], - ) -> Result { + ) -> Result<(QueryGraph, Vec)> { + let mut new_located_query_terms = terms.to_vec(); + let nbr_typos = number_of_typos_allowed(ctx)?; let mut nodes_data: Vec = vec![QueryNodeData::Start, QueryNodeData::End]; @@ -107,10 +110,11 @@ impl QueryGraph { let original_terms_len = terms.len(); for term_idx in 0..original_terms_len { let mut new_nodes = vec![]; + let new_node_idx = add_node( &mut nodes_data, QueryNodeData::Term(LocatedQueryTermSubset { - term_subset: QueryTermSubset::full(Interned::from_raw(term_idx as u16)), + term_subset: QueryTermSubset::full(terms[term_idx].value), positions: terms[term_idx].positions.clone(), term_ids: term_idx as u8..=term_idx as u8, }), @@ -121,6 +125,7 @@ impl QueryGraph { if let Some(ngram) = query_term::make_ngram(ctx, &terms[term_idx - 1..=term_idx], &nbr_typos)? { + new_located_query_terms.push(ngram.clone()); let ngram_idx = add_node( &mut nodes_data, QueryNodeData::Term(LocatedQueryTermSubset { @@ -136,6 +141,7 @@ impl QueryGraph { if let Some(ngram) = query_term::make_ngram(ctx, &terms[term_idx - 2..=term_idx], &nbr_typos)? { + new_located_query_terms.push(ngram.clone()); let ngram_idx = add_node( &mut nodes_data, QueryNodeData::Term(LocatedQueryTermSubset { @@ -167,7 +173,7 @@ impl QueryGraph { let mut graph = QueryGraph { root_node, end_node, nodes }; graph.build_initial_edges(); - Ok(graph) + Ok((graph, new_located_query_terms)) } /// Remove the given nodes, connecting all their predecessors to all their successors. From a37da36766101d10688e9185e9c822a7e6b77ac7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Mon, 8 May 2023 11:52:43 +0200 Subject: [PATCH 14/73] Implement `words` as a graph-based ranking rule and fix some bugs --- .../search/new/graph_based_ranking_rule.rs | 8 +- milli/src/search/new/logger/visual.rs | 24 ++-- milli/src/search/new/mod.rs | 11 +- .../new/ranking_rule_graph/cheapest_paths.rs | 105 ++++++++++++------ .../src/search/new/ranking_rule_graph/mod.rs | 3 + .../new/ranking_rule_graph/words/mod.rs | 49 ++++++++ milli/src/search/new/words.rs | 87 --------------- 7 files changed, 148 insertions(+), 139 deletions(-) create mode 100644 milli/src/search/new/ranking_rule_graph/words/mod.rs delete mode 100644 milli/src/search/new/words.rs diff --git a/milli/src/search/new/graph_based_ranking_rule.rs b/milli/src/search/new/graph_based_ranking_rule.rs index d8f6836e7..dd25ddd4a 100644 --- a/milli/src/search/new/graph_based_ranking_rule.rs +++ b/milli/src/search/new/graph_based_ranking_rule.rs @@ -46,7 +46,7 @@ use super::logger::SearchLogger; use super::query_graph::QueryNode; use super::ranking_rule_graph::{ ConditionDocIdsCache, DeadEndsCache, ExactnessGraph, FidGraph, PositionGraph, ProximityGraph, - RankingRuleGraph, RankingRuleGraphTrait, TypoGraph, + RankingRuleGraph, RankingRuleGraphTrait, TypoGraph, WordsGraph, }; use super::small_bitmap::SmallBitmap; use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext}; @@ -54,6 +54,12 @@ use crate::search::new::query_term::LocatedQueryTermSubset; use crate::search::new::ranking_rule_graph::PathVisitor; use crate::{Result, TermsMatchingStrategy}; +pub type Words = GraphBasedRankingRule; +impl GraphBasedRankingRule { + pub fn new(terms_matching_strategy: TermsMatchingStrategy) -> Self { + Self::new_with_id("words".to_owned(), Some(terms_matching_strategy)) + } +} pub type Proximity = GraphBasedRankingRule; impl GraphBasedRankingRule { pub fn new(terms_matching_strategy: Option) -> Self { diff --git a/milli/src/search/new/logger/visual.rs b/milli/src/search/new/logger/visual.rs index 1cbe007d3..f76782e63 100644 --- a/milli/src/search/new/logger/visual.rs +++ b/milli/src/search/new/logger/visual.rs @@ -4,7 +4,6 @@ use std::io::{BufWriter, Write}; use std::path::{Path, PathBuf}; use std::time::Instant; -// use rand::random; use roaring::RoaringBitmap; use crate::search::new::interner::Interned; @@ -13,6 +12,7 @@ use crate::search::new::query_term::LocatedQueryTermSubset; use crate::search::new::ranking_rule_graph::{ Edge, FidCondition, FidGraph, PositionCondition, PositionGraph, ProximityCondition, ProximityGraph, RankingRuleGraph, RankingRuleGraphTrait, TypoCondition, TypoGraph, + WordsCondition, WordsGraph, }; use crate::search::new::ranking_rules::BoxRankingRule; use crate::search::new::{QueryGraph, QueryNode, RankingRule, SearchContext, SearchLogger}; @@ -24,11 +24,12 @@ pub enum SearchEvents { RankingRuleSkipBucket { ranking_rule_idx: usize, bucket_len: u64 }, RankingRuleEndIteration { ranking_rule_idx: usize, universe_len: u64 }, ExtendResults { new: Vec }, - WordsGraph { query_graph: QueryGraph }, ProximityGraph { graph: RankingRuleGraph }, ProximityPaths { paths: Vec>> }, TypoGraph { graph: RankingRuleGraph }, TypoPaths { paths: Vec>> }, + WordsGraph { graph: RankingRuleGraph }, + WordsPaths { paths: Vec>> }, FidGraph { graph: RankingRuleGraph }, FidPaths { paths: Vec>> }, PositionGraph { graph: RankingRuleGraph }, @@ -139,8 +140,11 @@ impl SearchLogger for VisualSearchLogger { let Some(location) = self.location.last() else { return }; match location { Location::Words => { - if let Some(query_graph) = state.downcast_ref::() { - self.events.push(SearchEvents::WordsGraph { query_graph: query_graph.clone() }); + if let Some(graph) = state.downcast_ref::>() { + self.events.push(SearchEvents::WordsGraph { graph: graph.clone() }); + } + if let Some(paths) = state.downcast_ref::>>>() { + self.events.push(SearchEvents::WordsPaths { paths: paths.clone() }); } } Location::Typo => { @@ -329,7 +333,6 @@ impl<'ctx> DetailedLoggerFinish<'ctx> { SearchEvents::ExtendResults { new } => { self.write_extend_results(new)?; } - SearchEvents::WordsGraph { query_graph } => self.write_words_graph(query_graph)?, SearchEvents::ProximityGraph { graph } => self.write_rr_graph(&graph)?, SearchEvents::ProximityPaths { paths } => { self.write_rr_graph_paths::(paths)?; @@ -338,6 +341,10 @@ impl<'ctx> DetailedLoggerFinish<'ctx> { SearchEvents::TypoPaths { paths } => { self.write_rr_graph_paths::(paths)?; } + SearchEvents::WordsGraph { graph } => self.write_rr_graph(&graph)?, + SearchEvents::WordsPaths { paths } => { + self.write_rr_graph_paths::(paths)?; + } SearchEvents::FidGraph { graph } => self.write_rr_graph(&graph)?, SearchEvents::FidPaths { paths } => { self.write_rr_graph_paths::(paths)?; @@ -482,13 +489,6 @@ fill: \"#B6E2D3\" } Ok(()) } - fn write_words_graph(&mut self, qg: QueryGraph) -> Result<()> { - self.make_new_file_for_internal_state_if_needed()?; - - self.write_query_graph(&qg)?; - - Ok(()) - } fn write_rr_graph( &mut self, graph: &RankingRuleGraph, diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs index cbc085b12..a28f42f35 100644 --- a/milli/src/search/new/mod.rs +++ b/milli/src/search/new/mod.rs @@ -15,11 +15,7 @@ mod resolve_query_graph; mod small_bitmap; mod exact_attribute; -// TODO: documentation + comments -// implementation is currently an adaptation of the previous implementation to fit with the new model mod sort; -// TODO: documentation + comments -mod words; #[cfg(test)] mod tests; @@ -43,10 +39,10 @@ use ranking_rules::{ use resolve_query_graph::{compute_query_graph_docids, PhraseDocIdsCache}; use roaring::RoaringBitmap; use sort::Sort; -use words::Words; use self::geo_sort::GeoSort; pub use self::geo_sort::Strategy as GeoSortStrategy; +use self::graph_based_ranking_rule::Words; use self::interner::Interned; use crate::search::new::distinct::apply_distinct_rule; use crate::{AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError}; @@ -202,6 +198,11 @@ fn get_ranking_rules_for_query_graph_search<'ctx>( let mut sorted_fields = HashSet::new(); let mut geo_sorted = false; + // Don't add the `words` ranking rule if the term matching strategy is `All` + if matches!(terms_matching_strategy, TermsMatchingStrategy::All) { + words = true; + } + let mut ranking_rules: Vec> = vec![]; let settings_ranking_rules = ctx.index.criteria(ctx.txn)?; for rr in settings_ranking_rules { diff --git a/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs b/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs index c065cc706..30caf0017 100644 --- a/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs +++ b/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs @@ -205,18 +205,12 @@ impl VisitorState { impl RankingRuleGraph { pub fn find_all_costs_to_end(&self) -> MappedInterner> { let mut costs_to_end = self.query_graph.nodes.map(|_| vec![]); - let mut enqueued = SmallBitmap::new(self.query_graph.nodes.len()); - let mut node_stack = VecDeque::new(); - - *costs_to_end.get_mut(self.query_graph.end_node) = vec![0]; - - for prev_node in self.query_graph.nodes.get(self.query_graph.end_node).predecessors.iter() { - node_stack.push_back(prev_node); - enqueued.insert(prev_node); - } - - while let Some(cur_node) = node_stack.pop_front() { + self.traverse_breadth_first_backward(self.query_graph.end_node, |cur_node| { + if cur_node == self.query_graph.end_node { + *costs_to_end.get_mut(self.query_graph.end_node) = vec![0]; + return true; + } let mut self_costs = Vec::::new(); let cur_node_edges = &self.edges_of_node.get(cur_node); @@ -232,13 +226,8 @@ impl RankingRuleGraph { self_costs.dedup(); *costs_to_end.get_mut(cur_node) = self_costs; - for prev_node in self.query_graph.nodes.get(cur_node).predecessors.iter() { - if !enqueued.contains(prev_node) { - node_stack.push_back(prev_node); - enqueued.insert(prev_node); - } - } - } + true + }); costs_to_end } @@ -247,17 +236,9 @@ impl RankingRuleGraph { node_with_removed_outgoing_conditions: Interned, costs: &mut MappedInterner>, ) { - let mut enqueued = SmallBitmap::new(self.query_graph.nodes.len()); - let mut node_stack = VecDeque::new(); - - enqueued.insert(node_with_removed_outgoing_conditions); - node_stack.push_back(node_with_removed_outgoing_conditions); - - 'main_loop: while let Some(cur_node) = node_stack.pop_front() { + self.traverse_breadth_first_backward(node_with_removed_outgoing_conditions, |cur_node| { let mut costs_to_remove = FxHashSet::default(); - for c in costs.get(cur_node) { - costs_to_remove.insert(*c); - } + costs_to_remove.extend(costs.get(cur_node).iter().copied()); let cur_node_edges = &self.edges_of_node.get(cur_node); for edge_idx in cur_node_edges.iter() { @@ -265,23 +246,79 @@ impl RankingRuleGraph { for cost in costs.get(edge.dest_node).iter() { costs_to_remove.remove(&(*cost + edge.cost as u64)); if costs_to_remove.is_empty() { - continue 'main_loop; + return false; } } } if costs_to_remove.is_empty() { - continue 'main_loop; + return false; } let mut new_costs = BTreeSet::from_iter(costs.get(cur_node).iter().copied()); for c in costs_to_remove { new_costs.remove(&c); } *costs.get_mut(cur_node) = new_costs.into_iter().collect(); + true + }); + } - for prev_node in self.query_graph.nodes.get(cur_node).predecessors.iter() { - if !enqueued.contains(prev_node) { - node_stack.push_back(prev_node); - enqueued.insert(prev_node); + /// Traverse the graph backwards from the given node such that every time + /// a node is visited, we are guaranteed that all its successors either: + /// 1. have already been visited; OR + /// 2. were not reachable from the given node + pub fn traverse_breadth_first_backward( + &self, + from: Interned, + mut visit: impl FnMut(Interned) -> bool, + ) { + let mut reachable = SmallBitmap::for_interned_values_in(&self.query_graph.nodes); + { + // go backward to get the set of all reachable nodes from the given node + // the nodes that are not reachable will be set as `visited` + let mut stack = VecDeque::new(); + let mut enqueued = SmallBitmap::for_interned_values_in(&self.query_graph.nodes); + enqueued.insert(from); + stack.push_back(from); + while let Some(n) = stack.pop_front() { + if reachable.contains(n) { + continue; + } + reachable.insert(n); + for prev_node in self.query_graph.nodes.get(n).predecessors.iter() { + if !enqueued.contains(prev_node) && !reachable.contains(prev_node) { + stack.push_back(prev_node); + enqueued.insert(prev_node); + } + } + } + }; + let mut unreachable_or_visited = + SmallBitmap::for_interned_values_in(&self.query_graph.nodes); + for (n, _) in self.query_graph.nodes.iter() { + if !reachable.contains(n) { + unreachable_or_visited.insert(n); + } + } + + let mut enqueued = SmallBitmap::for_interned_values_in(&self.query_graph.nodes); + let mut stack = VecDeque::new(); + + enqueued.insert(from); + stack.push_back(from); + + while let Some(cur_node) = stack.pop_front() { + if !self.query_graph.nodes.get(cur_node).successors.is_subset(&unreachable_or_visited) { + stack.push_back(cur_node); + continue; + } + unreachable_or_visited.insert(cur_node); + if visit(cur_node) { + for prev_node in self.query_graph.nodes.get(cur_node).predecessors.iter() { + if !enqueued.contains(prev_node) && !unreachable_or_visited.contains(prev_node) + { + stack.push_back(prev_node); + enqueued.insert(prev_node); + } } } } diff --git a/milli/src/search/new/ranking_rule_graph/mod.rs b/milli/src/search/new/ranking_rule_graph/mod.rs index f60c481de..8de455822 100644 --- a/milli/src/search/new/ranking_rule_graph/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/mod.rs @@ -20,6 +20,8 @@ mod position; mod proximity; /// Implementation of the `typo` ranking rule mod typo; +/// Implementation of the `words` ranking rule +mod words; use std::collections::BTreeSet; use std::hash::Hash; @@ -33,6 +35,7 @@ pub use position::{PositionCondition, PositionGraph}; pub use proximity::{ProximityCondition, ProximityGraph}; use roaring::RoaringBitmap; pub use typo::{TypoCondition, TypoGraph}; +pub use words::{WordsCondition, WordsGraph}; use super::interner::{DedupInterner, FixedSizeInterner, Interned, MappedInterner}; use super::query_term::LocatedQueryTermSubset; diff --git a/milli/src/search/new/ranking_rule_graph/words/mod.rs b/milli/src/search/new/ranking_rule_graph/words/mod.rs new file mode 100644 index 000000000..0a0cc112b --- /dev/null +++ b/milli/src/search/new/ranking_rule_graph/words/mod.rs @@ -0,0 +1,49 @@ +use roaring::RoaringBitmap; + +use super::{ComputedCondition, RankingRuleGraphTrait}; +use crate::search::new::interner::{DedupInterner, Interned}; +use crate::search::new::query_term::LocatedQueryTermSubset; +use crate::search::new::resolve_query_graph::compute_query_term_subset_docids; +use crate::search::new::SearchContext; +use crate::Result; + +#[derive(Clone, PartialEq, Eq, Hash)] +pub struct WordsCondition { + term: LocatedQueryTermSubset, +} + +pub enum WordsGraph {} + +impl RankingRuleGraphTrait for WordsGraph { + type Condition = WordsCondition; + + fn resolve_condition( + ctx: &mut SearchContext, + condition: &Self::Condition, + universe: &RoaringBitmap, + ) -> Result { + let WordsCondition { term, .. } = condition; + // maybe compute_query_term_subset_docids should accept a universe as argument + let mut docids = compute_query_term_subset_docids(ctx, &term.term_subset)?; + docids &= universe; + + Ok(ComputedCondition { + docids, + universe_len: universe.len(), + start_term_subset: None, + end_term_subset: term.clone(), + }) + } + + fn build_edges( + _ctx: &mut SearchContext, + conditions_interner: &mut DedupInterner, + _from: Option<&LocatedQueryTermSubset>, + to_term: &LocatedQueryTermSubset, + ) -> Result)>> { + Ok(vec![( + to_term.term_ids.len() as u32, + conditions_interner.insert(WordsCondition { term: to_term.clone() }), + )]) + } +} diff --git a/milli/src/search/new/words.rs b/milli/src/search/new/words.rs deleted file mode 100644 index 72b7b5916..000000000 --- a/milli/src/search/new/words.rs +++ /dev/null @@ -1,87 +0,0 @@ -use roaring::RoaringBitmap; - -use super::logger::SearchLogger; -use super::query_graph::QueryNode; -use super::resolve_query_graph::compute_query_graph_docids; -use super::small_bitmap::SmallBitmap; -use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext}; -use crate::{Result, TermsMatchingStrategy}; - -pub struct Words { - exhausted: bool, // TODO: remove - query_graph: Option, - nodes_to_remove: Vec>, - terms_matching_strategy: TermsMatchingStrategy, -} -impl Words { - pub fn new(terms_matching_strategy: TermsMatchingStrategy) -> Self { - Self { - exhausted: true, - query_graph: None, - nodes_to_remove: vec![], - terms_matching_strategy, - } - } -} - -impl<'ctx> RankingRule<'ctx, QueryGraph> for Words { - fn id(&self) -> String { - "words".to_owned() - } - fn start_iteration( - &mut self, - ctx: &mut SearchContext<'ctx>, - _logger: &mut dyn SearchLogger, - _universe: &RoaringBitmap, - parent_query_graph: &QueryGraph, - ) -> Result<()> { - self.exhausted = false; - self.query_graph = Some(parent_query_graph.clone()); - self.nodes_to_remove = match self.terms_matching_strategy { - TermsMatchingStrategy::Last => { - let mut ns = parent_query_graph.removal_order_for_terms_matching_strategy_last(ctx); - ns.reverse(); - ns - } - TermsMatchingStrategy::All => { - vec![] - } - }; - Ok(()) - } - - fn next_bucket( - &mut self, - ctx: &mut SearchContext<'ctx>, - logger: &mut dyn SearchLogger, - universe: &RoaringBitmap, - ) -> Result>> { - if self.exhausted { - return Ok(None); - } - let Some(query_graph) = &mut self.query_graph else { panic!() }; - logger.log_internal_state(query_graph); - - let this_bucket = compute_query_graph_docids(ctx, query_graph, universe)?; - - let child_query_graph = query_graph.clone(); - - if self.nodes_to_remove.is_empty() { - self.exhausted = true; - } else { - let nodes_to_remove = self.nodes_to_remove.pop().unwrap(); - query_graph.remove_nodes_keep_edges(&nodes_to_remove.iter().collect::>()); - } - Ok(Some(RankingRuleOutput { query: child_query_graph, candidates: this_bucket })) - } - - fn end_iteration( - &mut self, - _ctx: &mut SearchContext<'ctx>, - _logger: &mut dyn SearchLogger, - ) { - self.exhausted = true; - self.nodes_to_remove = vec![]; - self.query_graph = None; - } -} From 42650f82e8a92d1c6615cb434946d3fba9a2abce Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 16 May 2023 10:57:26 +0200 Subject: [PATCH 15/73] Re-add final dot --- meilisearch/tests/search/errors.rs | 2 +- milli/src/error.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/meilisearch/tests/search/errors.rs b/meilisearch/tests/search/errors.rs index 528043908..f314e8800 100644 --- a/meilisearch/tests/search/errors.rs +++ b/meilisearch/tests/search/errors.rs @@ -946,7 +946,7 @@ async fn sort_unset_ranking_rule() { index.wait_task(1).await; let expected_response = json!({ - "message": "You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time", + "message": "You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.", "code": "invalid_search_sort", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_sort" diff --git a/milli/src/error.rs b/milli/src/error.rs index fb87ced80..8d55eabbd 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -126,7 +126,7 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco InvalidSortableAttribute { field: String, valid_fields: BTreeSet }, #[error("{}", HeedError::BadOpenOptions)] InvalidLmdbOpenOptions, - #[error("You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time")] + #[error("You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.")] SortRankingRuleMissing, #[error("The database file is in an invalid state.")] InvalidStoreFile, From f6524a68582f2a96b130263261e18ca4d0146948 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Mon, 8 May 2023 13:06:35 +0200 Subject: [PATCH 16/73] Adjust costs of edges in position ranking rule To ensure good performance --- .../new/ranking_rule_graph/position/mod.rs | 29 +++++++------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/milli/src/search/new/ranking_rule_graph/position/mod.rs b/milli/src/search/new/ranking_rule_graph/position/mod.rs index d4640097e..9b0b6478f 100644 --- a/milli/src/search/new/ranking_rule_graph/position/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/position/mod.rs @@ -111,23 +111,16 @@ impl RankingRuleGraphTrait for PositionGraph { fn cost_from_position(sum_positions: u32) -> u32 { match sum_positions { - 0 | 1 | 2 | 3 => sum_positions, - 4 | 5 => 4, - 6 | 7 => 5, - 8 | 9 => 6, - 10 | 11 => 7, - 12 | 13 => 8, - 14 | 15 => 9, - 16 | 17..=24 => 10, - 25..=32 => 11, - 33..=64 => 12, - 65..=128 => 13, - 129..=256 => 14, - 257..=512 => 15, - 513..=1024 => 16, - 1025..=2048 => 17, - 2049..=4096 => 18, - 4097..=8192 => 19, - _ => 20, + 0 => 0, + 1 => 1, + 2..=4 => 2, + 5..=7 => 3, + 8..=11 => 4, + 12..=16 => 5, + 17..=24 => 6, + 25..=64 => 7, + 65..=256 => 8, + 257..=1024 => 9, + _ => 10, } } From 0b38f211ac9290cc9e9dc0315ff631dff5eced89 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 16 May 2023 12:07:44 +0200 Subject: [PATCH 17/73] test the new introduced route --- meilisearch/tests/auth/authorization.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/meilisearch/tests/auth/authorization.rs b/meilisearch/tests/auth/authorization.rs index ef4a7eaa1..58fba4481 100644 --- a/meilisearch/tests/auth/authorization.rs +++ b/meilisearch/tests/auth/authorization.rs @@ -16,8 +16,11 @@ pub static AUTHORIZATIONS: Lazy hashset!{"search", "*"}, ("POST", "/indexes/products/documents") => hashset!{"documents.add", "documents.*", "*"}, ("GET", "/indexes/products/documents") => hashset!{"documents.get", "documents.*", "*"}, + ("POST", "/indexes/products/documents/fetch") => hashset!{"documents.get", "documents.*", "*"}, ("GET", "/indexes/products/documents/0") => hashset!{"documents.get", "documents.*", "*"}, ("DELETE", "/indexes/products/documents/0") => hashset!{"documents.delete", "documents.*", "*"}, + ("POST", "/indexes/products/documents/delete-batch") => hashset!{"documents.delete", "documents.*", "*"}, + ("POST", "/indexes/products/documents/delete") => hashset!{"documents.delete", "documents.*", "*"}, ("GET", "/tasks") => hashset!{"tasks.get", "tasks.*", "*"}, ("DELETE", "/tasks") => hashset!{"tasks.delete", "tasks.*", "*"}, ("GET", "/tasks?indexUid=products") => hashset!{"tasks.get", "tasks.*", "*"}, From 3e19702de607ab41861970f025c06bd5ed7b9603 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Tue, 16 May 2023 12:22:46 +0200 Subject: [PATCH 18/73] Update snapshot tests --- milli/src/search/new/tests/attribute_position.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/milli/src/search/new/tests/attribute_position.rs b/milli/src/search/new/tests/attribute_position.rs index 5e16cd023..37f303b10 100644 --- a/milli/src/search/new/tests/attribute_position.rs +++ b/milli/src/search/new/tests/attribute_position.rs @@ -138,7 +138,7 @@ fn test_attribute_position_simple() { s.terms_matching_strategy(TermsMatchingStrategy::All); s.query("quick brown"); let SearchResult { documents_ids, .. } = s.execute().unwrap(); - insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 11, 12, 13, 3, 4, 2, 1, 0, 6, 8, 7, 9, 5]"); + insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 11, 12, 13, 2, 3, 4, 1, 0, 6, 8, 7, 9, 5]"); } #[test] fn test_attribute_position_repeated() { @@ -163,7 +163,7 @@ fn test_attribute_position_different_fields() { s.terms_matching_strategy(TermsMatchingStrategy::All); s.query("quick brown"); let SearchResult { documents_ids, .. } = s.execute().unwrap(); - insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 11, 12, 13, 3, 4, 2, 1, 0, 6, 8, 7, 9, 5]"); + insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 11, 12, 13, 2, 3, 4, 1, 0, 6, 8, 7, 9, 5]"); } #[test] @@ -176,5 +176,5 @@ fn test_attribute_position_ngrams() { s.terms_matching_strategy(TermsMatchingStrategy::All); s.query("quick brown"); let SearchResult { documents_ids, .. } = s.execute().unwrap(); - insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 11, 12, 13, 3, 4, 2, 1, 0, 6, 8, 7, 9, 5]"); + insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 11, 12, 13, 2, 3, 4, 1, 0, 6, 8, 7, 9, 5]"); } From 96da5130a4074fab1150f35dfb7c0c5f605f5515 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 16 May 2023 13:56:18 +0200 Subject: [PATCH 19/73] fix the error code in case of not filterable attributes on the get / delete documents by filter routes --- index-scheduler/src/batch.rs | 8 ++++- index-scheduler/src/error.rs | 8 +++++ meilisearch/src/routes/indexes/documents.rs | 7 ++++- meilisearch/tests/documents/errors.rs | 35 +++++++++++++++++---- 4 files changed, 50 insertions(+), 8 deletions(-) diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs index c88234809..67f70d367 100644 --- a/index-scheduler/src/batch.rs +++ b/index-scheduler/src/batch.rs @@ -24,6 +24,7 @@ use std::io::BufWriter; use dump::IndexMetadata; use log::{debug, error, info}; +use meilisearch_types::error::Code; use meilisearch_types::heed::{RoTxn, RwTxn}; use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader}; use meilisearch_types::milli::heed::CompactionOption; @@ -1491,7 +1492,12 @@ fn delete_document_by_filter(filter: &serde_json::Value, index: Index) -> Result Ok(if let Some(filter) = filter { let mut wtxn = index.write_txn()?; - let candidates = filter.evaluate(&wtxn, &index)?; + let candidates = filter.evaluate(&wtxn, &index).map_err(|err| match err { + milli::Error::UserError(milli::UserError::InvalidFilter(_)) => { + Error::from(err).with_custom_error_code(Code::InvalidDocumentFilter) + } + e => e.into(), + })?; let mut delete_operation = DeleteDocuments::new(&mut wtxn, &index)?; delete_operation.delete_documents(&candidates); let deleted_documents = diff --git a/index-scheduler/src/error.rs b/index-scheduler/src/error.rs index 3a19ed4d2..acab850d1 100644 --- a/index-scheduler/src/error.rs +++ b/index-scheduler/src/error.rs @@ -46,6 +46,8 @@ impl From for Code { #[allow(clippy::large_enum_variant)] #[derive(Error, Debug)] pub enum Error { + #[error("{1}")] + WithCustomErrorCode(Code, Box), #[error("Index `{0}` not found.")] IndexNotFound(String), #[error("Index `{0}` already exists.")] @@ -144,6 +146,7 @@ impl Error { pub fn is_recoverable(&self) -> bool { match self { Error::IndexNotFound(_) + | Error::WithCustomErrorCode(_, _) | Error::IndexAlreadyExists(_) | Error::SwapDuplicateIndexFound(_) | Error::SwapDuplicateIndexesFound(_) @@ -176,11 +179,16 @@ impl Error { Error::PlannedFailure => false, } } + + pub fn with_custom_error_code(self, code: Code) -> Self { + Self::WithCustomErrorCode(code, Box::new(self)) + } } impl ErrorCode for Error { fn error_code(&self) -> Code { match self { + Error::WithCustomErrorCode(code, _) => *code, Error::IndexNotFound(_) => Code::IndexNotFound, Error::IndexAlreadyExists(_) => Code::IndexAlreadyExists, Error::SwapDuplicateIndexesFound(_) => Code::InvalidSwapDuplicateIndexFound, diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs index eb0f5a59e..096f5737f 100644 --- a/meilisearch/src/routes/indexes/documents.rs +++ b/meilisearch/src/routes/indexes/documents.rs @@ -540,7 +540,12 @@ fn retrieve_documents>( }; let candidates = if let Some(filter) = filter { - filter.evaluate(&rtxn, index)? + filter.evaluate(&rtxn, index).map_err(|err| match err { + milli::Error::UserError(milli::UserError::InvalidFilter(_)) => { + ResponseError::from_msg(err.to_string(), Code::InvalidDocumentFilter) + } + e => e.into(), + })? } else { index.documents_ids(&rtxn)? }; diff --git a/meilisearch/tests/documents/errors.rs b/meilisearch/tests/documents/errors.rs index b72dc40f3..0210d1bb2 100644 --- a/meilisearch/tests/documents/errors.rs +++ b/meilisearch/tests/documents/errors.rs @@ -180,9 +180,9 @@ async fn get_all_documents_bad_filter() { snapshot!(json_string!(response), @r###" { "message": "Attribute `doggo` is not filterable. This index does not have configured filterable attributes.\n1:6 doggo=bernese", - "code": "invalid_search_filter", + "code": "invalid_document_filter", "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_search_filter" + "link": "https://docs.meilisearch.com/errors#invalid_document_filter" } "###); } @@ -630,9 +630,9 @@ async fn delete_document_by_filter() { }, "error": { "message": "Attribute `doggo` is not filterable. This index does not have configured filterable attributes.\n1:6 doggo = bernese", - "code": "invalid_search_filter", + "code": "invalid_document_filter", "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_search_filter" + "link": "https://docs.meilisearch.com/errors#invalid_document_filter" }, "duration": "[duration]", "enqueuedAt": "[date]", @@ -664,9 +664,9 @@ async fn delete_document_by_filter() { }, "error": { "message": "Attribute `catto` is not filterable. Available filterable attributes are: `doggo`.\n1:6 catto = jorts", - "code": "invalid_search_filter", + "code": "invalid_document_filter", "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_search_filter" + "link": "https://docs.meilisearch.com/errors#invalid_document_filter" }, "duration": "[duration]", "enqueuedAt": "[date]", @@ -748,4 +748,27 @@ async fn fetch_document_by_filter() { "link": "https://docs.meilisearch.com/errors#invalid_document_filter" } "###); + + let (response, code) = index.get_document_by_filter(json!({ "filter": "cool doggo" })).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `cool doggo`.\n1:11 cool doggo", + "code": "invalid_document_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_document_filter" + } + "###); + + let (response, code) = + index.get_document_by_filter(json!({ "filter": "doggo = bernese" })).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Attribute `doggo` is not filterable. Available filterable attributes are: `color`.\n1:6 doggo = bernese", + "code": "invalid_document_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_document_filter" + } + "###); } From 57582688660f7bb0c40865a558933fa4f9a73a27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Tue, 16 May 2023 16:22:23 +0200 Subject: [PATCH 20/73] Don't compute split_words for phrases --- milli/src/search/new/logger/visual.rs | 2 +- .../new/query_term/compute_derivations.rs | 37 ++++++++++++++----- milli/src/search/new/query_term/mod.rs | 17 +++++++-- .../src/search/new/query_term/parse_query.rs | 4 +- .../search/new/ranking_rule_graph/typo/mod.rs | 2 +- 5 files changed, 45 insertions(+), 17 deletions(-) diff --git a/milli/src/search/new/logger/visual.rs b/milli/src/search/new/logger/visual.rs index f76782e63..8df56da89 100644 --- a/milli/src/search/new/logger/visual.rs +++ b/milli/src/search/new/logger/visual.rs @@ -462,7 +462,7 @@ fill: \"#B6E2D3\" shape: class max_nbr_typo: {}", term_subset.description(ctx), - term_subset.max_nbr_typos(ctx) + term_subset.max_typo_cost(ctx) )?; for w in term_subset.all_single_words_except_prefix_db(ctx)? { diff --git a/milli/src/search/new/query_term/compute_derivations.rs b/milli/src/search/new/query_term/compute_derivations.rs index c26c4bc6b..d5dfbbcd0 100644 --- a/milli/src/search/new/query_term/compute_derivations.rs +++ b/milli/src/search/new/query_term/compute_derivations.rs @@ -28,14 +28,14 @@ pub enum ZeroOrOneTypo { impl Interned { pub fn compute_fully_if_needed(self, ctx: &mut SearchContext) -> Result<()> { let s = ctx.term_interner.get_mut(self); - if s.max_nbr_typos <= 1 && s.one_typo.is_uninit() { + if s.max_levenshtein_distance <= 1 && s.one_typo.is_uninit() { assert!(s.two_typo.is_uninit()); // Initialize one_typo subterm even if max_nbr_typo is 0 because of split words self.initialize_one_typo_subterm(ctx)?; let s = ctx.term_interner.get_mut(self); assert!(s.one_typo.is_init()); s.two_typo = Lazy::Init(TwoTypoTerm::default()); - } else if s.max_nbr_typos > 1 && s.two_typo.is_uninit() { + } else if s.max_levenshtein_distance > 1 && s.two_typo.is_uninit() { assert!(s.two_typo.is_uninit()); self.initialize_one_and_two_typo_subterm(ctx)?; let s = ctx.term_interner.get_mut(self); @@ -185,7 +185,7 @@ pub fn partially_initialized_term_from_word( original: ctx.word_interner.insert(word.to_owned()), ngram_words: None, is_prefix: false, - max_nbr_typos: 0, + max_levenshtein_distance: 0, zero_typo: <_>::default(), one_typo: Lazy::Init(<_>::default()), two_typo: Lazy::Init(<_>::default()), @@ -256,7 +256,7 @@ pub fn partially_initialized_term_from_word( Ok(QueryTerm { original: word_interned, ngram_words: None, - max_nbr_typos: max_typo, + max_levenshtein_distance: max_typo, is_prefix, zero_typo, one_typo: Lazy::Uninit, @@ -275,7 +275,16 @@ fn find_split_words(ctx: &mut SearchContext, word: &str) -> Result { fn initialize_one_typo_subterm(self, ctx: &mut SearchContext) -> Result<()> { let self_mut = ctx.term_interner.get_mut(self); - let QueryTerm { original, is_prefix, one_typo, max_nbr_typos, .. } = self_mut; + + let allows_split_words = self_mut.allows_split_words(); + let QueryTerm { + original, + is_prefix, + one_typo, + max_levenshtein_distance: max_nbr_typos, + .. + } = self_mut; + let original = *original; let is_prefix = *is_prefix; // let original_str = ctx.word_interner.get(*original).to_owned(); @@ -300,13 +309,17 @@ impl Interned { })?; } - let original_str = ctx.word_interner.get(original).to_owned(); - let split_words = find_split_words(ctx, original_str.as_str())?; + let split_words = if allows_split_words { + let original_str = ctx.word_interner.get(original).to_owned(); + find_split_words(ctx, original_str.as_str())? + } else { + None + }; let self_mut = ctx.term_interner.get_mut(self); // Only add the split words to the derivations if: - // 1. the term is not an ngram; OR + // 1. the term is neither an ngram nor a phrase; OR // 2. the term is an ngram, but the split words are different from the ngram's component words let split_words = if let Some((ngram_words, split_words)) = self_mut.ngram_words.as_ref().zip(split_words.as_ref()) @@ -328,7 +341,13 @@ impl Interned { } fn initialize_one_and_two_typo_subterm(self, ctx: &mut SearchContext) -> Result<()> { let self_mut = ctx.term_interner.get_mut(self); - let QueryTerm { original, is_prefix, two_typo, max_nbr_typos, .. } = self_mut; + let QueryTerm { + original, + is_prefix, + two_typo, + max_levenshtein_distance: max_nbr_typos, + .. + } = self_mut; let original_str = ctx.word_interner.get(*original).to_owned(); if two_typo.is_init() { return Ok(()); diff --git a/milli/src/search/new/query_term/mod.rs b/milli/src/search/new/query_term/mod.rs index bf521d9b2..fb749a797 100644 --- a/milli/src/search/new/query_term/mod.rs +++ b/milli/src/search/new/query_term/mod.rs @@ -43,7 +43,7 @@ pub struct QueryTermSubset { pub struct QueryTerm { original: Interned, ngram_words: Option>>, - max_nbr_typos: u8, + max_levenshtein_distance: u8, is_prefix: bool, zero_typo: ZeroTypoTerm, // May not be computed yet @@ -342,10 +342,16 @@ impl QueryTermSubset { } None } - pub fn max_nbr_typos(&self, ctx: &SearchContext) -> u8 { + pub fn max_typo_cost(&self, ctx: &SearchContext) -> u8 { let t = ctx.term_interner.get(self.original); - match t.max_nbr_typos { - 0 => 0, + match t.max_levenshtein_distance { + 0 => { + if t.allows_split_words() { + 1 + } else { + 0 + } + } 1 => { if self.one_typo_subset.is_empty() { 0 @@ -438,6 +444,9 @@ impl QueryTerm { self.zero_typo.is_empty() && one_typo.is_empty() && two_typo.is_empty() } + fn allows_split_words(&self) -> bool { + self.zero_typo.phrase.is_none() + } } impl Interned { diff --git a/milli/src/search/new/query_term/parse_query.rs b/milli/src/search/new/query_term/parse_query.rs index dc317a0fb..bf90748e4 100644 --- a/milli/src/search/new/query_term/parse_query.rs +++ b/milli/src/search/new/query_term/parse_query.rs @@ -217,7 +217,7 @@ pub fn make_ngram( original: ngram_str_interned, ngram_words: Some(words_interned), is_prefix, - max_nbr_typos, + max_levenshtein_distance: max_nbr_typos, zero_typo: term.zero_typo, one_typo: Lazy::Uninit, two_typo: Lazy::Uninit, @@ -271,7 +271,7 @@ impl PhraseBuilder { QueryTerm { original: ctx.word_interner.insert(phrase_desc), ngram_words: None, - max_nbr_typos: 0, + max_levenshtein_distance: 0, is_prefix: false, zero_typo: ZeroTypoTerm { phrase: Some(phrase), diff --git a/milli/src/search/new/ranking_rule_graph/typo/mod.rs b/milli/src/search/new/ranking_rule_graph/typo/mod.rs index da5198c23..a44be6015 100644 --- a/milli/src/search/new/ranking_rule_graph/typo/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/typo/mod.rs @@ -50,7 +50,7 @@ impl RankingRuleGraphTrait for TypoGraph { // 3-gram -> equivalent to 2 typos let base_cost = if term.term_ids.len() == 1 { 0 } else { term.term_ids.len() as u32 }; - for nbr_typos in 0..=term.term_subset.max_nbr_typos(ctx) { + for nbr_typos in 0..=term.term_subset.max_typo_cost(ctx) { let mut term = term.clone(); match nbr_typos { 0 => { From ec8f685d8404673e4961daeec364a993c46124e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Tue, 16 May 2023 17:01:30 +0200 Subject: [PATCH 21/73] Fix bug in cheapest path algorithm --- .../new/ranking_rule_graph/cheapest_paths.rs | 22 ++++++++----------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs b/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs index 30caf0017..4a696b3dd 100644 --- a/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs +++ b/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs @@ -209,7 +209,7 @@ impl RankingRuleGraph { self.traverse_breadth_first_backward(self.query_graph.end_node, |cur_node| { if cur_node == self.query_graph.end_node { *costs_to_end.get_mut(self.query_graph.end_node) = vec![0]; - return true; + return; } let mut self_costs = Vec::::new(); @@ -226,7 +226,6 @@ impl RankingRuleGraph { self_costs.dedup(); *costs_to_end.get_mut(cur_node) = self_costs; - true }); costs_to_end } @@ -246,19 +245,18 @@ impl RankingRuleGraph { for cost in costs.get(edge.dest_node).iter() { costs_to_remove.remove(&(*cost + edge.cost as u64)); if costs_to_remove.is_empty() { - return false; + return; } } } if costs_to_remove.is_empty() { - return false; + return; } let mut new_costs = BTreeSet::from_iter(costs.get(cur_node).iter().copied()); for c in costs_to_remove { new_costs.remove(&c); } *costs.get_mut(cur_node) = new_costs.into_iter().collect(); - true }); } @@ -269,7 +267,7 @@ impl RankingRuleGraph { pub fn traverse_breadth_first_backward( &self, from: Interned, - mut visit: impl FnMut(Interned) -> bool, + mut visit: impl FnMut(Interned), ) { let mut reachable = SmallBitmap::for_interned_values_in(&self.query_graph.nodes); { @@ -312,13 +310,11 @@ impl RankingRuleGraph { continue; } unreachable_or_visited.insert(cur_node); - if visit(cur_node) { - for prev_node in self.query_graph.nodes.get(cur_node).predecessors.iter() { - if !enqueued.contains(prev_node) && !unreachable_or_visited.contains(prev_node) - { - stack.push_back(prev_node); - enqueued.insert(prev_node); - } + visit(cur_node); + for prev_node in self.query_graph.nodes.get(cur_node).predecessors.iter() { + if !enqueued.contains(prev_node) && !unreachable_or_visited.contains(prev_node) { + stack.push_back(prev_node); + enqueued.insert(prev_node); } } } From d7ddf4925e7ca5b4a444a7f14fe5f3035242f522 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 17 May 2023 14:25:50 +0200 Subject: [PATCH 22/73] Revert "Disable autobatching of additions and deletions" This reverts commit a94e78ffb051193ece752a9dd19858a05922f706. --- index-scheduler/src/autobatcher.rs | 103 ++++++++++++++++++++--------- index-scheduler/src/lib.rs | 99 +++++++++++++++++++++++++++ 2 files changed, 169 insertions(+), 33 deletions(-) diff --git a/index-scheduler/src/autobatcher.rs b/index-scheduler/src/autobatcher.rs index d738cc5e4..096bcce14 100644 --- a/index-scheduler/src/autobatcher.rs +++ b/index-scheduler/src/autobatcher.rs @@ -321,9 +321,18 @@ impl BatchKind { }) } ( - this @ BatchKind::DocumentOperation { .. }, + BatchKind::DocumentOperation { method, allow_index_creation, primary_key, mut operation_ids }, K::DocumentDeletion, - ) => Break(this), + ) => { + operation_ids.push(id); + + Continue(BatchKind::DocumentOperation { + method, + allow_index_creation, + primary_key, + operation_ids, + }) + } // but we can't autobatch documents if it's not the same kind // this match branch MUST be AFTER the previous one ( @@ -346,7 +355,35 @@ impl BatchKind { deletion_ids.push(id); Continue(BatchKind::DocumentClear { ids: deletion_ids }) } - // we can't autobatch a deletion and an import + // we can autobatch the deletion and import if the index already exists + ( + BatchKind::DocumentDeletion { mut deletion_ids }, + K::DocumentImport { method, allow_index_creation, primary_key } + ) if index_already_exists => { + deletion_ids.push(id); + + Continue(BatchKind::DocumentOperation { + method, + allow_index_creation, + primary_key, + operation_ids: deletion_ids, + }) + } + // we can autobatch the deletion and import if both can't create an index + ( + BatchKind::DocumentDeletion { mut deletion_ids }, + K::DocumentImport { method, allow_index_creation, primary_key } + ) if !allow_index_creation => { + deletion_ids.push(id); + + Continue(BatchKind::DocumentOperation { + method, + allow_index_creation, + primary_key, + operation_ids: deletion_ids, + }) + } + // we can't autobatch a deletion and an import if the index does not exists but would be created by an addition ( this @ BatchKind::DocumentDeletion { .. }, K::DocumentImport { .. } @@ -648,36 +685,36 @@ mod tests { debug_snapshot!(autobatch_from(false,None, [settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0] }, false))"); debug_snapshot!(autobatch_from(false,None, [settings(false), settings(false), settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0, 1, 2] }, false))"); - // We can't autobatch document addition with document deletion - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###); - debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###); - debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###); - debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###); - debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###); - // we also can't do the only way around - debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); + // We can autobatch document addition with document deletion + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); + debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###); + debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###); + debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); + debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); + // And the other way around + debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); + debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); + debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); + debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); + debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); + debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); } #[test] diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index af20ba1ae..f245bb186 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -2017,6 +2017,105 @@ mod tests { snapshot!(snapshot_index_scheduler(&index_scheduler), name: "both_task_succeeded"); } + #[test] + fn document_addition_and_document_deletion() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let content = r#"[ + { "id": 1, "doggo": "jean bob" }, + { "id": 2, "catto": "jorts" }, + { "id": 3, "doggo": "bork" } + ]"#; + + let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); + let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + file.persist().unwrap(); + index_scheduler + .register(KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + index_scheduler + .register(KindWithContent::DocumentDeletion { + index_uid: S("doggos"), + documents_ids: vec![S("1"), S("2")], + }) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); + + handle.advance_one_successful_batch(); // The addition AND deletion should've been batched together + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_processing_the_batch"); + + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); + } + + #[test] + fn document_deletion_and_document_addition() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + index_scheduler + .register(KindWithContent::DocumentDeletion { + index_uid: S("doggos"), + documents_ids: vec![S("1"), S("2")], + }) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + + let content = r#"[ + { "id": 1, "doggo": "jean bob" }, + { "id": 2, "catto": "jorts" }, + { "id": 3, "doggo": "bork" } + ]"#; + + let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); + let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + file.persist().unwrap(); + index_scheduler + .register(KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); + + // The deletion should have failed because it can't create an index + handle.advance_one_failed_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_failing_the_deletion"); + + // The addition should works + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_last_successful_addition"); + + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); + } + #[test] fn do_not_batch_task_of_different_indexes() { let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); From 4391cba6ca10a60cce5be068f61634ee02e7a3aa Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 17 May 2023 18:19:43 +0200 Subject: [PATCH 23/73] fix the addition + deletion bug --- Cargo.lock | 22 +++ .../after_processing_the_batch.snap | 43 +++++ .../documents.snap | 9 + .../registered_the_first_task.snap | 37 ++++ .../registered_the_second_task.snap | 40 ++++ .../after_failing_the_deletion.snap | 43 +++++ .../after_last_successful_addition.snap | 46 +++++ .../documents.snap | 17 ++ .../registered_the_first_task.snap | 36 ++++ .../registered_the_second_task.snap | 40 ++++ milli/Cargo.toml | 4 +- milli/src/documents/mod.rs | 2 - milli/src/update/index_documents/mod.rs | 176 +++++++++++++++++- milli/src/update/index_documents/transform.rs | 21 +-- 14 files changed, 518 insertions(+), 18 deletions(-) create mode 100644 index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/after_processing_the_batch.snap create mode 100644 index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/documents.snap create mode 100644 index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/registered_the_first_task.snap create mode 100644 index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/registered_the_second_task.snap create mode 100644 index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/after_failing_the_deletion.snap create mode 100644 index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/after_last_successful_addition.snap create mode 100644 index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/documents.snap create mode 100644 index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/registered_the_first_task.snap create mode 100644 index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/registered_the_second_task.snap diff --git a/Cargo.lock b/Cargo.lock index 5f192b6d1..a432908a2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -359,6 +359,15 @@ dependencies = [ "backtrace", ] +[[package]] +name = "arbitrary" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2d098ff73c1ca148721f37baad5ea6a465a13f9573aba8641fbbbae8164a54e" +dependencies = [ + "derive_arbitrary", +] + [[package]] name = "assert-json-diff" version = "2.0.2" @@ -1096,6 +1105,17 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "derive_arbitrary" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cdeb9ec472d588e539a818b2dee436825730da08ad0017c4b1a17676bdc8b7" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "derive_builder" version = "0.12.0" @@ -2711,6 +2731,7 @@ dependencies = [ name = "milli" version = "1.2.0" dependencies = [ + "arbitrary", "big_s", "bimap", "bincode", @@ -2722,6 +2743,7 @@ dependencies = [ "csv", "deserr", "either", + "fastrand", "filter-parser", "flatten-serde-json", "fst", diff --git a/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/after_processing_the_batch.snap b/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/after_processing_the_batch.snap new file mode 100644 index 000000000..b27288a0f --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/after_processing_the_batch.snap @@ -0,0 +1,43 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} +1 {uid: 1, status: succeeded, details: { received_document_ids: 2, deleted_documents: Some(2) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +"documentDeletion" [1,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,] +---------------------------------------------------------------------- +### Index Mapper: +doggos: { number_of_documents: 1, field_distribution: {"doggo": 1, "id": 1} } + +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,1,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,1,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/documents.snap b/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/documents.snap new file mode 100644 index 000000000..2b56b71d1 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/documents.snap @@ -0,0 +1,9 @@ +--- +source: index-scheduler/src/lib.rs +--- +[ + { + "id": 3, + "doggo": "bork" + } +] diff --git a/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/registered_the_first_task.snap b/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/registered_the_first_task.snap new file mode 100644 index 000000000..d26e62bff --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/registered_the_first_task.snap @@ -0,0 +1,37 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,] +---------------------------------------------------------------------- +### Index Mapper: + +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/registered_the_second_task.snap b/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/registered_the_second_task.snap new file mode 100644 index 000000000..e0f371120 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/registered_the_second_task.snap @@ -0,0 +1,40 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} +1 {uid: 1, status: enqueued, details: { received_document_ids: 2, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +"documentDeletion" [1,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,] +---------------------------------------------------------------------- +### Index Mapper: + +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/after_failing_the_deletion.snap b/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/after_failing_the_deletion.snap new file mode 100644 index 000000000..1d4aa24e2 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/after_failing_the_deletion.snap @@ -0,0 +1,43 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_document_ids: 2, deleted_documents: Some(0) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }} +1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [1,] +failed [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,] +"documentDeletion" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,] +---------------------------------------------------------------------- +### Index Mapper: + +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/after_last_successful_addition.snap b/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/after_last_successful_addition.snap new file mode 100644 index 000000000..0f9dfd3e6 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/after_last_successful_addition.snap @@ -0,0 +1,46 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_document_ids: 2, deleted_documents: Some(0) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }} +1 {uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [1,] +failed [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,] +"documentDeletion" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,] +---------------------------------------------------------------------- +### Index Mapper: +doggos: { number_of_documents: 3, field_distribution: {"catto": 1, "doggo": 2, "id": 3} } + +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/documents.snap b/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/documents.snap new file mode 100644 index 000000000..8204d059b --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/documents.snap @@ -0,0 +1,17 @@ +--- +source: index-scheduler/src/lib.rs +--- +[ + { + "id": 1, + "doggo": "jean bob" + }, + { + "id": 2, + "catto": "jorts" + }, + { + "id": 3, + "doggo": "bork" + } +] diff --git a/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/registered_the_first_task.snap b/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/registered_the_first_task.snap new file mode 100644 index 000000000..5753db7e6 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/registered_the_first_task.snap @@ -0,0 +1,36 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_document_ids: 2, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }} +---------------------------------------------------------------------- +### Status: +enqueued [0,] +---------------------------------------------------------------------- +### Kind: +"documentDeletion" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,] +---------------------------------------------------------------------- +### Index Mapper: + +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/registered_the_second_task.snap b/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/registered_the_second_task.snap new file mode 100644 index 000000000..0b6191f9e --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/registered_the_second_task.snap @@ -0,0 +1,40 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_document_ids: 2, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }} +1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,] +"documentDeletion" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,] +---------------------------------------------------------------------- +### Index Mapper: + +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 + +---------------------------------------------------------------------- + diff --git a/milli/Cargo.toml b/milli/Cargo.toml index de0f4e31d..ea48e008c 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -56,6 +56,7 @@ itertools = "0.10.5" log = "0.4.17" logging_timer = "1.1.0" csv = "1.2.1" +fastrand = "1.9.0" [dev-dependencies] mimalloc = { version = "0.1.29", default-features = false } @@ -64,12 +65,13 @@ insta = "1.29.0" maplit = "1.0.2" md5 = "0.7.0" rand = {version = "0.8.5", features = ["small_rng"] } +arbitrary = { version = "1.3.0", features = ["derive"] } [target.'cfg(fuzzing)'.dev-dependencies] fuzzcheck = "0.12.1" [features] -all-tokenizations = [ "charabia/default" ] +all-tokenizations = ["charabia/default"] # Use POSIX semaphores instead of SysV semaphores in LMDB # For more information on this feature, see heed's Cargo.toml diff --git a/milli/src/documents/mod.rs b/milli/src/documents/mod.rs index 43b31187d..7c037b3bf 100644 --- a/milli/src/documents/mod.rs +++ b/milli/src/documents/mod.rs @@ -111,7 +111,6 @@ pub enum Error { Io(#[from] io::Error), } -#[cfg(test)] pub fn objects_from_json_value(json: serde_json::Value) -> Vec { let documents = match json { object @ serde_json::Value::Object(_) => vec![object], @@ -141,7 +140,6 @@ macro_rules! documents { }}; } -#[cfg(test)] pub fn documents_batch_reader_from_objects( objects: impl IntoIterator, ) -> DocumentsBatchReader>> { diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index bbfa1d00c..406bfb0c9 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -198,6 +198,7 @@ where let number_of_documents = self.index.number_of_documents(self.wtxn)?; return Ok(DocumentAdditionResult { indexed_documents: 0, number_of_documents }); } + let output = self .transform .take() @@ -220,6 +221,7 @@ where } let indexed_documents = output.documents_count as u64; + let number_of_documents = self.execute_raw(output)?; Ok(DocumentAdditionResult { indexed_documents, number_of_documents }) @@ -236,7 +238,7 @@ where primary_key, fields_ids_map, field_distribution, - mut external_documents_ids, + new_external_documents_ids, new_documents_ids, replaced_documents_ids, documents_count, @@ -363,9 +365,6 @@ where deletion_builder.delete_documents(&replaced_documents_ids); let deleted_documents_result = deletion_builder.execute_inner()?; debug!("{} documents actually deleted", deleted_documents_result.deleted_documents); - if !deleted_documents_result.soft_deletion_used { - external_documents_ids.delete_soft_deleted_documents_ids_from_fsts()?; - } } let index_documents_ids = self.index.documents_ids(self.wtxn)?; @@ -445,6 +444,9 @@ where self.index.put_primary_key(self.wtxn, &primary_key)?; // We write the external documents ids into the main database. + let mut external_documents_ids = self.index.external_documents_ids(self.wtxn)?; + external_documents_ids.insert_ids(&new_external_documents_ids)?; + let external_documents_ids = external_documents_ids.into_static(); self.index.put_external_documents_ids(self.wtxn, &external_documents_ids)?; let all_documents_ids = index_documents_ids | new_documents_ids; @@ -2515,4 +2517,170 @@ mod tests { db_snap!(index, word_position_docids, 3, @"74f556b91d161d997a89468b4da1cb8f"); db_snap!(index, docid_word_positions, 3, @"5287245332627675740b28bd46e1cde1"); } + + #[test] + fn reproduce_the_bug() { + /* + [milli/examples/fuzz.rs:69] &batches = [ + Batch( + [ + AddDoc( + { "id": 1, "doggo": "bernese" }, => internal 0 + ), + ], + ), + Batch( + [ + DeleteDoc( + 1, => delete internal 0 + ), + AddDoc( + { "id": 0, "catto": "jorts" }, => internal 1 + ), + ], + ), + Batch( + [ + AddDoc( + { "id": 1, "catto": "jorts" }, => internal 2 + ), + ], + ), + ] + */ + let mut index = TempIndex::new(); + index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysHard; + + // START OF BATCH + + println!("--- ENTERING BATCH 1"); + + let mut wtxn = index.write_txn().unwrap(); + + let builder = IndexDocuments::new( + &mut wtxn, + &index, + &index.indexer_config, + index.index_documents_config.clone(), + |_| (), + || false, + ) + .unwrap(); + + // OP + + let documents = documents!([ + { "id": 1, "doggo": "bernese" }, + ]); + let (builder, added) = builder.add_documents(documents).unwrap(); + insta::assert_display_snapshot!(added.unwrap(), @"1"); + + // FINISHING + let addition = builder.execute().unwrap(); + insta::assert_debug_snapshot!(addition, @r###" + DocumentAdditionResult { + indexed_documents: 1, + number_of_documents: 1, + } + "###); + wtxn.commit().unwrap(); + + db_snap!(index, documents, @r###" + {"id":1,"doggo":"bernese"} + "###); + db_snap!(index, external_documents_ids, @r###" + soft: + hard: + 1 0 + "###); + + // A first batch of documents has been inserted + + // BATCH 2 + + println!("--- ENTERING BATCH 2"); + + let mut wtxn = index.write_txn().unwrap(); + + let builder = IndexDocuments::new( + &mut wtxn, + &index, + &index.indexer_config, + index.index_documents_config.clone(), + |_| (), + || false, + ) + .unwrap(); + + let (builder, removed) = builder.remove_documents(vec![S("1")]).unwrap(); + insta::assert_display_snapshot!(removed.unwrap(), @"1"); + + let documents = documents!([ + { "id": 0, "catto": "jorts" }, + ]); + let (builder, added) = builder.add_documents(documents).unwrap(); + insta::assert_display_snapshot!(added.unwrap(), @"1"); + + let addition = builder.execute().unwrap(); + insta::assert_debug_snapshot!(addition, @r###" + DocumentAdditionResult { + indexed_documents: 1, + number_of_documents: 1, + } + "###); + wtxn.commit().unwrap(); + + db_snap!(index, documents, @r###" + {"id":0,"catto":"jorts"} + "###); + + db_snap!(index, external_documents_ids, @r###" + soft: + hard: + 0 1 + "###); + + db_snap!(index, soft_deleted_documents_ids, @"[]"); + + // BATCH 3 + + println!("--- ENTERING BATCH 3"); + + let mut wtxn = index.write_txn().unwrap(); + + let builder = IndexDocuments::new( + &mut wtxn, + &index, + &index.indexer_config, + index.index_documents_config.clone(), + |_| (), + || false, + ) + .unwrap(); + + let documents = documents!([ + { "id": 1, "catto": "jorts" }, + ]); + let (builder, added) = builder.add_documents(documents).unwrap(); + insta::assert_display_snapshot!(added.unwrap(), @"1"); + + let addition = builder.execute().unwrap(); + insta::assert_debug_snapshot!(addition, @r###" + DocumentAdditionResult { + indexed_documents: 1, + number_of_documents: 2, + } + "###); + wtxn.commit().unwrap(); + + db_snap!(index, documents, @r###" + {"id":1,"catto":"jorts"} + {"id":0,"catto":"jorts"} + "###); + + // Ensuring all the returned IDs actually exists + let rtxn = index.read_txn().unwrap(); + let res = index.search(&rtxn).execute().unwrap(); + index.documents(&rtxn, res.documents_ids).unwrap(); + } } diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index 6097278a7..e2a260391 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -21,15 +21,14 @@ use crate::error::{Error, InternalError, UserError}; use crate::index::{db_name, main_key}; use crate::update::{AvailableDocumentsIds, ClearDocuments, UpdateIndexingStep}; use crate::{ - ExternalDocumentsIds, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, - Result, BEU32, + FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result, BEU32, }; pub struct TransformOutput { pub primary_key: String, pub fields_ids_map: FieldsIdsMap, pub field_distribution: FieldDistribution, - pub external_documents_ids: ExternalDocumentsIds<'static>, + pub new_external_documents_ids: fst::Map>, pub new_documents_ids: RoaringBitmap, pub replaced_documents_ids: RoaringBitmap, pub documents_count: usize, @@ -58,8 +57,8 @@ pub struct Transform<'a, 'i> { original_sorter: grenad::Sorter, flattened_sorter: grenad::Sorter, - replaced_documents_ids: RoaringBitmap, - new_documents_ids: RoaringBitmap, + pub replaced_documents_ids: RoaringBitmap, + pub new_documents_ids: RoaringBitmap, // To increase the cache locality and decrease the heap usage we use compact smartstring. new_external_documents_ids_builder: FxHashMap, u64>, documents_count: usize, @@ -568,8 +567,6 @@ impl<'a, 'i> Transform<'a, 'i> { }))? .to_string(); - let mut external_documents_ids = self.index.external_documents_ids(wtxn)?; - // We create a final writer to write the new documents in order from the sorter. let mut writer = create_writer( self.indexer_settings.chunk_compression_type, @@ -651,13 +648,14 @@ impl<'a, 'i> Transform<'a, 'i> { fst_new_external_documents_ids_builder.insert(key, value) })?; let new_external_documents_ids = fst_new_external_documents_ids_builder.into_map(); - external_documents_ids.insert_ids(&new_external_documents_ids)?; Ok(TransformOutput { primary_key, fields_ids_map: self.fields_ids_map, field_distribution, - external_documents_ids: external_documents_ids.into_static(), + new_external_documents_ids: new_external_documents_ids + .map_data(|c| Cow::Owned(c)) + .unwrap(), new_documents_ids: self.new_documents_ids, replaced_documents_ids: self.replaced_documents_ids, documents_count: self.documents_count, @@ -691,7 +689,8 @@ impl<'a, 'i> Transform<'a, 'i> { let new_external_documents_ids = { let mut external_documents_ids = self.index.external_documents_ids(wtxn)?; external_documents_ids.delete_soft_deleted_documents_ids_from_fsts()?; - external_documents_ids + // it is safe to get the hard document IDs + external_documents_ids.into_static().hard }; let documents_ids = self.index.documents_ids(wtxn)?; @@ -776,7 +775,7 @@ impl<'a, 'i> Transform<'a, 'i> { primary_key, fields_ids_map: new_fields_ids_map, field_distribution, - external_documents_ids: new_external_documents_ids.into_static(), + new_external_documents_ids, new_documents_ids: documents_ids, replaced_documents_ids: RoaringBitmap::default(), documents_count, From 7f619ff0e43708610fd00bb953ecef723c7bb16c Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 22 May 2023 10:33:49 +0200 Subject: [PATCH 24/73] get rids of the now unused soft_deletion_used parameter --- milli/src/update/delete_documents.rs | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs index 311f93f8f..10649b1bb 100644 --- a/milli/src/update/delete_documents.rs +++ b/milli/src/update/delete_documents.rs @@ -72,7 +72,6 @@ impl std::fmt::Display for DeletionStrategy { pub(crate) struct DetailedDocumentDeletionResult { pub deleted_documents: u64, pub remaining_documents: u64, - pub soft_deletion_used: bool, } impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { @@ -109,11 +108,8 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { Some(docid) } pub fn execute(self) -> Result { - let DetailedDocumentDeletionResult { - deleted_documents, - remaining_documents, - soft_deletion_used: _, - } = self.execute_inner()?; + let DetailedDocumentDeletionResult { deleted_documents, remaining_documents } = + self.execute_inner()?; Ok(DocumentDeletionResult { deleted_documents, remaining_documents }) } @@ -134,7 +130,6 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { return Ok(DetailedDocumentDeletionResult { deleted_documents: 0, remaining_documents: 0, - soft_deletion_used: false, }); } @@ -150,7 +145,6 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { return Ok(DetailedDocumentDeletionResult { deleted_documents: current_documents_ids_len, remaining_documents, - soft_deletion_used: false, }); } @@ -219,7 +213,6 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { return Ok(DetailedDocumentDeletionResult { deleted_documents: self.to_delete_docids.len(), remaining_documents: documents_ids.len(), - soft_deletion_used: true, }); } @@ -472,7 +465,6 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { Ok(DetailedDocumentDeletionResult { deleted_documents: self.to_delete_docids.len(), remaining_documents: documents_ids.len(), - soft_deletion_used: false, }) } From 602ad98cb8ceeb90a13067f94821381434722655 Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 22 May 2023 11:15:14 +0200 Subject: [PATCH 25/73] improve the way we handle the fsts --- milli/Cargo.toml | 4 ++- milli/src/external_documents_ids.rs | 36 +++++++++++-------- milli/src/update/index_documents/mod.rs | 2 -- milli/src/update/index_documents/transform.rs | 12 +++---- 4 files changed, 30 insertions(+), 24 deletions(-) diff --git a/milli/Cargo.toml b/milli/Cargo.toml index ea48e008c..f708edc73 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -56,7 +56,6 @@ itertools = "0.10.5" log = "0.4.17" logging_timer = "1.1.0" csv = "1.2.1" -fastrand = "1.9.0" [dev-dependencies] mimalloc = { version = "0.1.29", default-features = false } @@ -65,7 +64,10 @@ insta = "1.29.0" maplit = "1.0.2" md5 = "0.7.0" rand = {version = "0.8.5", features = ["small_rng"] } + +# fuzzing arbitrary = { version = "1.3.0", features = ["derive"] } +fastrand = "1.9.0" [target.'cfg(fuzzing)'.dev-dependencies] fuzzcheck = "0.12.1" diff --git a/milli/src/external_documents_ids.rs b/milli/src/external_documents_ids.rs index 2cecd1abe..36b147336 100644 --- a/milli/src/external_documents_ids.rs +++ b/milli/src/external_documents_ids.rs @@ -106,22 +106,30 @@ impl<'a> ExternalDocumentsIds<'a> { map } + /// Return an fst of the combined hard and soft deleted ID. + pub fn to_fst<'b>(&'b self) -> fst::Result>>> { + if self.soft.is_empty() { + return Ok(Cow::Borrowed(&self.hard)); + } + let union_op = self.hard.op().add(&self.soft).r#union(); + + let mut iter = union_op.into_stream(); + let mut new_hard_builder = fst::MapBuilder::memory(); + while let Some((external_id, marked_docids)) = iter.next() { + let value = indexed_last_value(marked_docids).unwrap(); + if value != DELETED_ID { + new_hard_builder.insert(external_id, value)?; + } + } + + drop(iter); + + Ok(Cow::Owned(new_hard_builder.into_map().map_data(Cow::Owned)?)) + } + fn merge_soft_into_hard(&mut self) -> fst::Result<()> { if self.soft.len() >= self.hard.len() / 2 { - let union_op = self.hard.op().add(&self.soft).r#union(); - - let mut iter = union_op.into_stream(); - let mut new_hard_builder = fst::MapBuilder::memory(); - while let Some((external_id, marked_docids)) = iter.next() { - let value = indexed_last_value(marked_docids).unwrap(); - if value != DELETED_ID { - new_hard_builder.insert(external_id, value)?; - } - } - - drop(iter); - - self.hard = new_hard_builder.into_map().map_data(Cow::Owned)?; + self.hard = self.to_fst()?.into_owned(); self.soft = fst::Map::default().map_data(Cow::Owned)?; } diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 406bfb0c9..70ec377aa 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -198,7 +198,6 @@ where let number_of_documents = self.index.number_of_documents(self.wtxn)?; return Ok(DocumentAdditionResult { indexed_documents: 0, number_of_documents }); } - let output = self .transform .take() @@ -221,7 +220,6 @@ where } let indexed_documents = output.documents_count as u64; - let number_of_documents = self.execute_raw(output)?; Ok(DocumentAdditionResult { indexed_documents, number_of_documents }) diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index e2a260391..ee6831be5 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -57,8 +57,8 @@ pub struct Transform<'a, 'i> { original_sorter: grenad::Sorter, flattened_sorter: grenad::Sorter, - pub replaced_documents_ids: RoaringBitmap, - pub new_documents_ids: RoaringBitmap, + replaced_documents_ids: RoaringBitmap, + new_documents_ids: RoaringBitmap, // To increase the cache locality and decrease the heap usage we use compact smartstring. new_external_documents_ids_builder: FxHashMap, u64>, documents_count: usize, @@ -653,9 +653,7 @@ impl<'a, 'i> Transform<'a, 'i> { primary_key, fields_ids_map: self.fields_ids_map, field_distribution, - new_external_documents_ids: new_external_documents_ids - .map_data(|c| Cow::Owned(c)) - .unwrap(), + new_external_documents_ids: new_external_documents_ids.map_data(Cow::Owned).unwrap(), new_documents_ids: self.new_documents_ids, replaced_documents_ids: self.replaced_documents_ids, documents_count: self.documents_count, @@ -689,8 +687,8 @@ impl<'a, 'i> Transform<'a, 'i> { let new_external_documents_ids = { let mut external_documents_ids = self.index.external_documents_ids(wtxn)?; external_documents_ids.delete_soft_deleted_documents_ids_from_fsts()?; - // it is safe to get the hard document IDs - external_documents_ids.into_static().hard + // This call should be free and can't fail since the previous method merged both fsts. + external_documents_ids.into_static().to_fst()?.into_owned() }; let documents_ids = self.index.documents_ids(wtxn)?; From 22213dc6044c32101a6e1b4d15640af71466f469 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 23 May 2023 09:14:26 +0200 Subject: [PATCH 26/73] push the fuzzer --- milli/examples/fuzz.rs | 116 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 milli/examples/fuzz.rs diff --git a/milli/examples/fuzz.rs b/milli/examples/fuzz.rs new file mode 100644 index 000000000..9536f3811 --- /dev/null +++ b/milli/examples/fuzz.rs @@ -0,0 +1,116 @@ +use arbitrary::{Arbitrary, Unstructured}; +use milli::heed::EnvOpenOptions; +use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig}; +use milli::Index; +use serde_json::{json, Value}; +use tempfile::TempDir; + +#[derive(Debug, Arbitrary)] +enum Document { + One, + Two, + Three, + Four, + Five, + Six, +} + +impl Document { + pub fn to_d(&self) -> Value { + match self { + Document::One => json!({ "id": 0, "doggo": "bernese" }), + Document::Two => json!({ "id": 0, "doggo": "golden" }), + Document::Three => json!({ "id": 0, "catto": "jorts" }), + Document::Four => json!({ "id": 1, "doggo": "bernese" }), + Document::Five => json!({ "id": 1, "doggo": "golden" }), + Document::Six => json!({ "id": 1, "catto": "jorts" }), + } + } +} + +#[derive(Debug, Arbitrary)] +enum DocId { + Zero, + One, +} + +impl DocId { + pub fn to_s(&self) -> String { + match self { + DocId::Zero => "0".to_string(), + DocId::One => "1".to_string(), + } + } +} + +#[derive(Debug, Arbitrary)] +enum Operation { + AddDoc(Document), + DeleteDoc(DocId), +} + +#[derive(Debug, Arbitrary)] +struct Batch([Operation; 2]); + +fn main() { + let mut options = EnvOpenOptions::new(); + options.map_size(1024 * 1024 * 1024 * 1024); + let _tempdir = TempDir::new_in("ramdisk").unwrap(); + let index = Index::new(options, _tempdir.path()).unwrap(); + let indexer_config = IndexerConfig::default(); + let index_documents_config = IndexDocumentsConfig::default(); + + loop { + // let v: Vec = std::iter::repeat_with(|| fastrand::u8(..)).take(1000).collect(); + + // let data = Unstructured::new(&v); + // let batches = <[Batch; 3]>::arbitrary(&mut data).unwrap(); + let batches = [ + Batch([Operation::AddDoc(Document::Five), Operation::AddDoc(Document::Three)]), + Batch([Operation::DeleteDoc(DocId::One), Operation::AddDoc(Document::Two)]), + Batch([Operation::DeleteDoc(DocId::Zero), Operation::AddDoc(Document::Five)]), + ]; + + dbg!(&batches); + + let mut wtxn = index.write_txn().unwrap(); + + for batch in batches { + dbg!(&batch); + + let mut builder = IndexDocuments::new( + &mut wtxn, + &index, + &indexer_config, + index_documents_config.clone(), + |_| (), + || false, + ) + .unwrap(); + + for op in batch.0 { + match op { + Operation::AddDoc(doc) => { + let documents = milli::documents::objects_from_json_value(doc.to_d()); + let documents = + milli::documents::documents_batch_reader_from_objects(documents); + let (b, _added) = builder.add_documents(documents).unwrap(); + builder = b; + } + Operation::DeleteDoc(id) => { + let (b, _removed) = builder.remove_documents(vec![id.to_s()]).unwrap(); + builder = b; + } + } + } + builder.execute().unwrap(); + // wtxn.commit().unwrap(); + + // after executing a batch we check if the database is corrupted + // let rtxn = index.read_txn().unwrap(); + let res = index.search(&wtxn).execute().unwrap(); + index.documents(&wtxn, res.documents_ids).unwrap(); + } + wtxn.abort().unwrap(); + } +} From 002f42875f1f852da092ecdce0a5ae9700ec08ca Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 23 May 2023 11:42:40 +0200 Subject: [PATCH 27/73] fix the fuzzer --- milli/examples/fuzz.rs | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/milli/examples/fuzz.rs b/milli/examples/fuzz.rs index 9536f3811..dc79e37b7 100644 --- a/milli/examples/fuzz.rs +++ b/milli/examples/fuzz.rs @@ -50,34 +50,27 @@ enum Operation { } #[derive(Debug, Arbitrary)] -struct Batch([Operation; 2]); +struct Batch([Operation; 5]); fn main() { let mut options = EnvOpenOptions::new(); options.map_size(1024 * 1024 * 1024 * 1024); - let _tempdir = TempDir::new_in("ramdisk").unwrap(); + let _tempdir = TempDir::new().unwrap(); let index = Index::new(options, _tempdir.path()).unwrap(); let indexer_config = IndexerConfig::default(); let index_documents_config = IndexDocumentsConfig::default(); loop { - // let v: Vec = std::iter::repeat_with(|| fastrand::u8(..)).take(1000).collect(); + let v: Vec = std::iter::repeat_with(|| fastrand::u8(..)).take(1000).collect(); - // let data = Unstructured::new(&v); - // let batches = <[Batch; 3]>::arbitrary(&mut data).unwrap(); - let batches = [ - Batch([Operation::AddDoc(Document::Five), Operation::AddDoc(Document::Three)]), - Batch([Operation::DeleteDoc(DocId::One), Operation::AddDoc(Document::Two)]), - Batch([Operation::DeleteDoc(DocId::Zero), Operation::AddDoc(Document::Five)]), - ]; + let mut data = Unstructured::new(&v); + let batches = <[Batch; 5]>::arbitrary(&mut data).unwrap(); dbg!(&batches); let mut wtxn = index.write_txn().unwrap(); for batch in batches { - dbg!(&batch); - let mut builder = IndexDocuments::new( &mut wtxn, &index, From a490a11325cebfde7ead969c95ebc95af78e1352 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 23 May 2023 15:24:24 +0200 Subject: [PATCH 28/73] Add explanatory comment on the way we're recomputing costs --- milli/src/search/new/ranking_rule_graph/cheapest_paths.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs b/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs index 4a696b3dd..738b53016 100644 --- a/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs +++ b/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs @@ -235,6 +235,9 @@ impl RankingRuleGraph { node_with_removed_outgoing_conditions: Interned, costs: &mut MappedInterner>, ) { + // Traverse the graph backward from the target node, recomputing the cost for each of its predecessors. + // We first check that no other node is contributing the same total cost to a predecessor before removing + // the cost from the predecessor. self.traverse_breadth_first_backward(node_with_removed_outgoing_conditions, |cur_node| { let mut costs_to_remove = FxHashSet::default(); costs_to_remove.extend(costs.get(cur_node).iter().copied()); From 51043f78f0159ab18f7d64051aa076514b5019aa Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 23 May 2023 15:27:25 +0200 Subject: [PATCH 29/73] Remove trailing whitespace --- milli/src/search/new/ranking_rule_graph/cheapest_paths.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs b/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs index 738b53016..8fd943e6e 100644 --- a/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs +++ b/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs @@ -237,7 +237,7 @@ impl RankingRuleGraph { ) { // Traverse the graph backward from the target node, recomputing the cost for each of its predecessors. // We first check that no other node is contributing the same total cost to a predecessor before removing - // the cost from the predecessor. + // the cost from the predecessor. self.traverse_breadth_first_backward(node_with_removed_outgoing_conditions, |cur_node| { let mut costs_to_remove = FxHashSet::default(); costs_to_remove.extend(costs.get(cur_node).iter().copied()); From 57d53de40255afd45ea1f6add7ecf32a3434bc97 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 24 May 2023 10:47:05 +0200 Subject: [PATCH 30/73] Increase the number of buckets --- meilisearch/src/metrics.rs | 15 +++++++++++---- meilisearch/src/routes/metrics.rs | 5 ++--- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/meilisearch/src/metrics.rs b/meilisearch/src/metrics.rs index f6fdf756d..c518db941 100644 --- a/meilisearch/src/metrics.rs +++ b/meilisearch/src/metrics.rs @@ -4,12 +4,19 @@ use prometheus::{ register_int_gauge_vec, HistogramVec, IntCounterVec, IntGauge, IntGaugeVec, }; -const HTTP_RESPONSE_TIME_CUSTOM_BUCKETS: &[f64; 14] = &[ - 0.0005, 0.0008, 0.00085, 0.0009, 0.00095, 0.001, 0.00105, 0.0011, 0.00115, 0.0012, 0.0015, - 0.002, 0.003, 1.0, -]; +/// Create evenly distributed buckets +fn create_buckets() -> [f64; N] { + let mut array = [0.0; N]; + + for i in 0..N { + array[i] = ((i + 1) as f64) / N as f64; + } + + array +} lazy_static! { + pub static ref HTTP_RESPONSE_TIME_CUSTOM_BUCKETS: [f64; 100] = create_buckets(); pub static ref HTTP_REQUESTS_TOTAL: IntCounterVec = register_int_counter_vec!( opts!("http_requests_total", "HTTP requests total"), &["method", "path"] diff --git a/meilisearch/src/routes/metrics.rs b/meilisearch/src/routes/metrics.rs index 874a1a5a5..83cd50542 100644 --- a/meilisearch/src/routes/metrics.rs +++ b/meilisearch/src/routes/metrics.rs @@ -17,7 +17,7 @@ pub fn configure(config: &mut web::ServiceConfig) { pub async fn get_metrics( index_scheduler: GuardedData, Data>, - auth_controller: GuardedData, Data>, + auth_controller: Data, ) -> Result { let auth_filters = index_scheduler.filters(); if !auth_filters.all_indexes_authorized() { @@ -28,8 +28,7 @@ pub async fn get_metrics( return Err(error); } - let response = - create_all_stats((*index_scheduler).clone(), (*auth_controller).clone(), auth_filters)?; + let response = create_all_stats((*index_scheduler).clone(), auth_controller, auth_filters)?; crate::metrics::MEILISEARCH_DB_SIZE_BYTES.set(response.database_size as i64); crate::metrics::MEILISEARCH_INDEX_COUNT.set(response.indexes.len() as i64); From ca99bc31885c7b78b22f0a343d0968f82ab98018 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 24 May 2023 11:29:20 +0200 Subject: [PATCH 31/73] implement the missing document filter error code when deleting documents --- meilisearch-types/src/deserr/mod.rs | 1 + meilisearch-types/src/error.rs | 1 + meilisearch/src/routes/indexes/documents.rs | 2 +- meilisearch/tests/documents/errors.rs | 12 ++++++++++++ 4 files changed, 15 insertions(+), 1 deletion(-) diff --git a/meilisearch-types/src/deserr/mod.rs b/meilisearch-types/src/deserr/mod.rs index 3e6ec8b96..bbaa42dc0 100644 --- a/meilisearch-types/src/deserr/mod.rs +++ b/meilisearch-types/src/deserr/mod.rs @@ -150,6 +150,7 @@ make_missing_field_convenience_builder!(MissingApiKeyActions, missing_api_key_ac make_missing_field_convenience_builder!(MissingApiKeyExpiresAt, missing_api_key_expires_at); make_missing_field_convenience_builder!(MissingApiKeyIndexes, missing_api_key_indexes); make_missing_field_convenience_builder!(MissingSwapIndexes, missing_swap_indexes); +make_missing_field_convenience_builder!(MissingDocumentFilter, missing_document_filter); // Integrate a sub-error into a [`DeserrError`] by taking its error message but using // the default error code (C) from `Self` diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index bcd8320c9..a88ff307c 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -214,6 +214,7 @@ InvalidApiKeyUid , InvalidRequest , BAD_REQUEST ; InvalidContentType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ; InvalidDocumentCsvDelimiter , InvalidRequest , BAD_REQUEST ; InvalidDocumentFields , InvalidRequest , BAD_REQUEST ; +MissingDocumentFilter , InvalidRequest , BAD_REQUEST ; InvalidDocumentFilter , InvalidRequest , BAD_REQUEST ; InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ; InvalidDocumentId , InvalidRequest , BAD_REQUEST ; diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs index b2b818e4b..4810f1c3d 100644 --- a/meilisearch/src/routes/indexes/documents.rs +++ b/meilisearch/src/routes/indexes/documents.rs @@ -486,7 +486,7 @@ pub async fn delete_documents_batch( #[derive(Debug, Deserr)] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] pub struct DocumentDeletionByFilter { - #[deserr(error = DeserrJsonError)] + #[deserr(error = DeserrJsonError, missing_field_error = DeserrJsonError::missing_document_filter)] filter: Value, } diff --git a/meilisearch/tests/documents/errors.rs b/meilisearch/tests/documents/errors.rs index 0210d1bb2..8340ea326 100644 --- a/meilisearch/tests/documents/errors.rs +++ b/meilisearch/tests/documents/errors.rs @@ -577,6 +577,18 @@ async fn delete_document_by_filter() { } "###); + // do not send any filter + let (response, code) = index.delete_document_by_filter(json!({})).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Missing field `filter`", + "code": "missing_document_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#missing_document_filter" + } + "###); + // index does not exists let (response, code) = index.delete_document_by_filter(json!({ "filter": "doggo = bernese"})).await; From b9dd092a628898df1724b69e95925c75e1cdddd7 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 24 May 2023 11:48:22 +0200 Subject: [PATCH 32/73] make the details return null in the originalFilter field if no filter was provided + add a big test on the details --- meilisearch/src/routes/tasks.rs | 9 +-- meilisearch/tests/tasks/mod.rs | 109 ++++++++++++++++++++++++++++++-- 2 files changed, 108 insertions(+), 10 deletions(-) diff --git a/meilisearch/src/routes/tasks.rs b/meilisearch/src/routes/tasks.rs index cab0f7197..4a2656982 100644 --- a/meilisearch/src/routes/tasks.rs +++ b/meilisearch/src/routes/tasks.rs @@ -99,7 +99,7 @@ pub struct DetailsView { #[serde(skip_serializing_if = "Option::is_none")] pub deleted_tasks: Option>, #[serde(skip_serializing_if = "Option::is_none")] - pub original_filter: Option, + pub original_filter: Option>, #[serde(skip_serializing_if = "Option::is_none")] pub dump_uid: Option>, #[serde(skip_serializing_if = "Option::is_none")] @@ -131,12 +131,13 @@ impl From
for DetailsView { } => DetailsView { provided_ids: Some(received_document_ids), deleted_documents: Some(deleted_documents), + original_filter: Some(None), ..DetailsView::default() }, Details::DocumentDeletionByFilter { original_filter, deleted_documents } => { DetailsView { provided_ids: Some(0), - original_filter: Some(original_filter), + original_filter: Some(Some(original_filter)), deleted_documents: Some(deleted_documents), ..DetailsView::default() } @@ -148,7 +149,7 @@ impl From
for DetailsView { DetailsView { matched_tasks: Some(matched_tasks), canceled_tasks: Some(canceled_tasks), - original_filter: Some(original_filter), + original_filter: Some(Some(original_filter)), ..DetailsView::default() } } @@ -156,7 +157,7 @@ impl From
for DetailsView { DetailsView { matched_tasks: Some(matched_tasks), deleted_tasks: Some(deleted_tasks), - original_filter: Some(original_filter), + original_filter: Some(Some(original_filter)), ..DetailsView::default() } } diff --git a/meilisearch/tests/tasks/mod.rs b/meilisearch/tests/tasks/mod.rs index e9b5a2325..4ac134871 100644 --- a/meilisearch/tests/tasks/mod.rs +++ b/meilisearch/tests/tasks/mod.rs @@ -413,7 +413,7 @@ async fn test_summarized_document_addition_or_update() { } #[actix_web::test] -async fn test_summarized_delete_batch() { +async fn test_summarized_delete_documents_by_batch() { let server = Server::new().await; let index = server.index("test"); index.delete_batch(vec![1, 2, 3]).await; @@ -430,7 +430,8 @@ async fn test_summarized_delete_batch() { "canceledBy": null, "details": { "providedIds": 3, - "deletedDocuments": 0 + "deletedDocuments": 0, + "originalFilter": null }, "error": { "message": "Index `test` not found.", @@ -460,7 +461,8 @@ async fn test_summarized_delete_batch() { "canceledBy": null, "details": { "providedIds": 1, - "deletedDocuments": 0 + "deletedDocuments": 0, + "originalFilter": null }, "error": null, "duration": "[duration]", @@ -472,7 +474,100 @@ async fn test_summarized_delete_batch() { } #[actix_web::test] -async fn test_summarized_delete_document() { +async fn test_summarized_delete_documents_by_filter() { + let server = Server::new().await; + let index = server.index("test"); + + index.delete_document_by_filter(json!({ "filter": "doggo = bernese" })).await; + index.wait_task(0).await; + let (task, _) = index.get_task(0).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 0, + "indexUid": "test", + "status": "failed", + "type": "documentDeletion", + "canceledBy": null, + "details": { + "providedIds": 0, + "deletedDocuments": 0, + "originalFilter": "\"doggo = bernese\"" + }, + "error": { + "message": "Index `test` not found.", + "code": "index_not_found", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#index_not_found" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + index.create(None).await; + index.delete_document_by_filter(json!({ "filter": "doggo = bernese" })).await; + index.wait_task(2).await; + let (task, _) = index.get_task(2).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 2, + "indexUid": "test", + "status": "failed", + "type": "documentDeletion", + "canceledBy": null, + "details": { + "providedIds": 0, + "deletedDocuments": 0, + "originalFilter": "\"doggo = bernese\"" + }, + "error": { + "message": "Attribute `doggo` is not filterable. This index does not have configured filterable attributes.\n1:6 doggo = bernese", + "code": "invalid_document_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_document_filter" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + index.update_settings(json!({ "filterableAttributes": ["doggo"] })).await; + index.delete_document_by_filter(json!({ "filter": "doggo = bernese" })).await; + index.wait_task(4).await; + let (task, _) = index.get_task(4).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 4, + "indexUid": "test", + "status": "succeeded", + "type": "documentDeletion", + "canceledBy": null, + "details": { + "providedIds": 0, + "deletedDocuments": 0, + "originalFilter": "\"doggo = bernese\"" + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); +} + +#[actix_web::test] +async fn test_summarized_delete_document_by_id() { let server = Server::new().await; let index = server.index("test"); index.delete_document(1).await; @@ -489,7 +584,8 @@ async fn test_summarized_delete_document() { "canceledBy": null, "details": { "providedIds": 1, - "deletedDocuments": 0 + "deletedDocuments": 0, + "originalFilter": null }, "error": { "message": "Index `test` not found.", @@ -519,7 +615,8 @@ async fn test_summarized_delete_document() { "canceledBy": null, "details": { "providedIds": 1, - "deletedDocuments": 0 + "deletedDocuments": 0, + "originalFilter": null }, "error": null, "duration": "[duration]", From 9111f5176f8c1a4a66518d9450a3431fd8a34a2a Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 24 May 2023 11:53:16 +0200 Subject: [PATCH 33/73] get rid of the invalid document delete filter in favor of the invalid document filter --- meilisearch-types/src/error.rs | 1 - meilisearch/src/error.rs | 2 +- meilisearch/src/routes/indexes/documents.rs | 6 +++--- meilisearch/tests/documents/errors.rs | 12 ++++++------ 4 files changed, 10 insertions(+), 11 deletions(-) diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index a88ff307c..1509847b7 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -220,7 +220,6 @@ InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ; InvalidDocumentId , InvalidRequest , BAD_REQUEST ; InvalidDocumentLimit , InvalidRequest , BAD_REQUEST ; InvalidDocumentOffset , InvalidRequest , BAD_REQUEST ; -InvalidDocumentDeleteFilter , InvalidRequest , BAD_REQUEST ; InvalidIndexLimit , InvalidRequest , BAD_REQUEST ; InvalidIndexOffset , InvalidRequest , BAD_REQUEST ; InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ; diff --git a/meilisearch/src/error.rs b/meilisearch/src/error.rs index 004f0d143..ca10c4593 100644 --- a/meilisearch/src/error.rs +++ b/meilisearch/src/error.rs @@ -61,7 +61,7 @@ impl ErrorCode for MeilisearchHttpError { MeilisearchHttpError::MissingPayload(_) => Code::MissingPayload, MeilisearchHttpError::InvalidContentType(_, _) => Code::InvalidContentType, MeilisearchHttpError::DocumentNotFound(_) => Code::DocumentNotFound, - MeilisearchHttpError::EmptyFilter => Code::InvalidDocumentDeleteFilter, + MeilisearchHttpError::EmptyFilter => Code::InvalidDocumentFilter, MeilisearchHttpError::InvalidExpression(_, _) => Code::InvalidSearchFilter, MeilisearchHttpError::PayloadTooLarge(_) => Code::PayloadTooLarge, MeilisearchHttpError::SwapIndexPayloadWrongLength(_) => Code::InvalidSwapIndexes, diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs index 4810f1c3d..2afc1b5fb 100644 --- a/meilisearch/src/routes/indexes/documents.rs +++ b/meilisearch/src/routes/indexes/documents.rs @@ -486,7 +486,7 @@ pub async fn delete_documents_batch( #[derive(Debug, Deserr)] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] pub struct DocumentDeletionByFilter { - #[deserr(error = DeserrJsonError, missing_field_error = DeserrJsonError::missing_document_filter)] + #[deserr(error = DeserrJsonError, missing_field_error = DeserrJsonError::missing_document_filter)] filter: Value, } @@ -508,8 +508,8 @@ pub async fn delete_documents_by_filter( || -> Result<_, ResponseError> { Ok(crate::search::parse_filter(&filter)?.ok_or(MeilisearchHttpError::EmptyFilter)?) }() - // and whatever was the error, the error code should always be an InvalidDocumentDeleteFilter - .map_err(|err| ResponseError::from_msg(err.message, Code::InvalidDocumentDeleteFilter))?; + // and whatever was the error, the error code should always be an InvalidDocumentFilter + .map_err(|err| ResponseError::from_msg(err.message, Code::InvalidDocumentFilter))?; let task = KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr: filter }; let task: SummarizedTaskView = diff --git a/meilisearch/tests/documents/errors.rs b/meilisearch/tests/documents/errors.rs index 8340ea326..7dab16a25 100644 --- a/meilisearch/tests/documents/errors.rs +++ b/meilisearch/tests/documents/errors.rs @@ -547,9 +547,9 @@ async fn delete_document_by_filter() { snapshot!(json_string!(response), @r###" { "message": "Invalid syntax for the filter parameter: `expected String, Array, found: true`.", - "code": "invalid_document_delete_filter", + "code": "invalid_document_filter", "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_document_delete_filter" + "link": "https://docs.meilisearch.com/errors#invalid_document_filter" } "###); @@ -559,9 +559,9 @@ async fn delete_document_by_filter() { snapshot!(json_string!(response), @r###" { "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `hello`.\n1:6 hello", - "code": "invalid_document_delete_filter", + "code": "invalid_document_filter", "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_document_delete_filter" + "link": "https://docs.meilisearch.com/errors#invalid_document_filter" } "###); @@ -571,9 +571,9 @@ async fn delete_document_by_filter() { snapshot!(json_string!(response), @r###" { "message": "Sending an empty filter is forbidden.", - "code": "invalid_document_delete_filter", + "code": "invalid_document_filter", "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_document_delete_filter" + "link": "https://docs.meilisearch.com/errors#invalid_document_filter" } "###); From 3c5d1c93de3a5f5014ac50ab99a94ad81562f759 Mon Sep 17 00:00:00 2001 From: TATHAGATA ROY <98920199+roy9495@users.noreply.github.com> Date: Thu, 11 May 2023 20:28:04 +0000 Subject: [PATCH 34/73] Added a cron test for disabled all-tokenization --- .github/workflows/test-suite.yml | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/.github/workflows/test-suite.yml b/.github/workflows/test-suite.yml index 5a64feda8..e705cde5c 100644 --- a/.github/workflows/test-suite.yml +++ b/.github/workflows/test-suite.yml @@ -105,6 +105,29 @@ jobs: command: test args: --workspace --locked --release --all-features + test-disabled-tokenization: + name: Test Disabled Tokenization + runs-on: ubuntu-latest + container: + image: ubuntu:18.04 + if: github.event_name == 'schedule' + steps: + - uses: actions/checkout@v3 + - name: Install needed dependencies + run: | + apt-get update + apt-get install --assume-yes build-essential curl + - uses: actions-rs/toolchain@v1 + with: + toolchain: stable + override: true + - name: Run cargo tree without default features + run: | + cargo tree -f '{p} {f}' -e normal --no-default-features | grep lindera -vqz + - name: Run cargo tree with default features + run: | + cargo tree -f '{p} {f}' -e normal | grep lindera -qz + # We run tests in debug also, to make sure that the debug_assertions are hit test-debug: name: Run tests in debug From 44f231d41ea1ebd349240413d4d38a05ea961d79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20U=2E=20-=20curqui?= Date: Thu, 25 May 2023 10:02:57 +0200 Subject: [PATCH 35/73] Update .github/workflows/test-suite.yml --- .github/workflows/test-suite.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-suite.yml b/.github/workflows/test-suite.yml index e705cde5c..cce376877 100644 --- a/.github/workflows/test-suite.yml +++ b/.github/workflows/test-suite.yml @@ -106,7 +106,7 @@ jobs: args: --workspace --locked --release --all-features test-disabled-tokenization: - name: Test Disabled Tokenization + name: Test disabled tokenization runs-on: ubuntu-latest container: image: ubuntu:18.04 From ead07d0b9d4a70153bdd81ce6ed3c9a12168b551 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20U=2E=20-=20curqui?= Date: Thu, 25 May 2023 10:03:45 +0200 Subject: [PATCH 36/73] Update .github/workflows/test-suite.yml --- .github/workflows/test-suite.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-suite.yml b/.github/workflows/test-suite.yml index cce376877..b16c2f6b5 100644 --- a/.github/workflows/test-suite.yml +++ b/.github/workflows/test-suite.yml @@ -121,7 +121,7 @@ jobs: with: toolchain: stable override: true - - name: Run cargo tree without default features + - name: Run cargo tree without default features and check lindera is not present run: | cargo tree -f '{p} {f}' -e normal --no-default-features | grep lindera -vqz - name: Run cargo tree with default features From 840727d76ff6837087a9859ea7d5e628c324ba6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20U=2E=20-=20curqui?= Date: Thu, 25 May 2023 10:04:07 +0200 Subject: [PATCH 37/73] Update .github/workflows/test-suite.yml --- .github/workflows/test-suite.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-suite.yml b/.github/workflows/test-suite.yml index b16c2f6b5..4319624dd 100644 --- a/.github/workflows/test-suite.yml +++ b/.github/workflows/test-suite.yml @@ -124,7 +124,7 @@ jobs: - name: Run cargo tree without default features and check lindera is not present run: | cargo tree -f '{p} {f}' -e normal --no-default-features | grep lindera -vqz - - name: Run cargo tree with default features + - name: Run cargo tree with default features and check lindera is pressent run: | cargo tree -f '{p} {f}' -e normal | grep lindera -qz From 1b601f70c671969a0db05eee65e4c911b9d91716 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 25 May 2023 11:08:16 +0200 Subject: [PATCH 38/73] increase the bucketing of requests --- meilisearch/src/metrics.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/meilisearch/src/metrics.rs b/meilisearch/src/metrics.rs index c518db941..7ee5241db 100644 --- a/meilisearch/src/metrics.rs +++ b/meilisearch/src/metrics.rs @@ -5,18 +5,18 @@ use prometheus::{ }; /// Create evenly distributed buckets -fn create_buckets() -> [f64; N] { - let mut array = [0.0; N]; - - for i in 0..N { - array[i] = ((i + 1) as f64) / N as f64; - } - - array +fn create_buckets() -> [f64; 29] { + (0..10) + .chain((10..100).step_by(10)) + .chain((100..=1000).step_by(100)) + .map(|i| i as f64 / 1000.) + .collect::>() + .try_into() + .unwrap() } lazy_static! { - pub static ref HTTP_RESPONSE_TIME_CUSTOM_BUCKETS: [f64; 100] = create_buckets(); + pub static ref HTTP_RESPONSE_TIME_CUSTOM_BUCKETS: [f64; 29] = create_buckets(); pub static ref HTTP_REQUESTS_TOTAL: IntCounterVec = register_int_counter_vec!( opts!("http_requests_total", "HTTP requests total"), &["method", "path"] From 11b95284cdf26fe1b14cccf31cab8055e9508fc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20U=2E=20-=20curqui?= Date: Thu, 25 May 2023 11:48:26 +0200 Subject: [PATCH 39/73] Revert "Improve docker cache" --- .dockerignore | 1 - .github/workflows/publish-docker-images.yml | 7 ------- Dockerfile | 5 ++--- 3 files changed, 2 insertions(+), 11 deletions(-) diff --git a/.dockerignore b/.dockerignore index ef2e39bfc..8c6bdbdeb 100644 --- a/.dockerignore +++ b/.dockerignore @@ -2,4 +2,3 @@ target Dockerfile .dockerignore .gitignore -**/.git diff --git a/.github/workflows/publish-docker-images.yml b/.github/workflows/publish-docker-images.yml index fa1f145da..9ceeaaaa4 100644 --- a/.github/workflows/publish-docker-images.yml +++ b/.github/workflows/publish-docker-images.yml @@ -58,13 +58,9 @@ jobs: - name: Set up QEMU uses: docker/setup-qemu-action@v2 - with: - platforms: linux/amd64,linux/arm64 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v2 - with: - platforms: linux/amd64,linux/arm64 - name: Login to Docker Hub uses: docker/login-action@v2 @@ -92,13 +88,10 @@ jobs: push: true platforms: linux/amd64,linux/arm64 tags: ${{ steps.meta.outputs.tags }} - builder: ${{ steps.buildx.outputs.name }} build-args: | COMMIT_SHA=${{ github.sha }} COMMIT_DATE=${{ steps.build-metadata.outputs.date }} GIT_TAG=${{ github.ref_name }} - cache-from: type=gha - cache-to: type=gha,mode=max # /!\ Don't touch this without checking with Cloud team - name: Send CI information to Cloud team diff --git a/Dockerfile b/Dockerfile index 0d7593f79..70950f338 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,3 @@ -# syntax=docker/dockerfile:1.4 # Compile FROM rust:alpine3.16 AS compiler @@ -12,7 +11,7 @@ ARG GIT_TAG ENV VERGEN_GIT_SHA=${COMMIT_SHA} VERGEN_GIT_COMMIT_TIMESTAMP=${COMMIT_DATE} VERGEN_GIT_SEMVER_LIGHTWEIGHT=${GIT_TAG} ENV RUSTFLAGS="-C target-feature=-crt-static" -COPY --link . . +COPY . . RUN set -eux; \ apkArch="$(apk --print-arch)"; \ if [ "$apkArch" = "aarch64" ]; then \ @@ -31,7 +30,7 @@ RUN apk update --quiet \ # add meilisearch to the `/bin` so you can run it from anywhere and it's easy # to find. -COPY --from=compiler --link /meilisearch/target/release/meilisearch /bin/meilisearch +COPY --from=compiler /meilisearch/target/release/meilisearch /bin/meilisearch # To stay compatible with the older version of the container (pre v0.27.0) we're # going to symlink the meilisearch binary in the path to `/meilisearch` RUN ln -s /bin/meilisearch /meilisearch From 2db09725f8cff6cc36d060777bd756fc94b0f4a2 Mon Sep 17 00:00:00 2001 From: curquiza Date: Thu, 25 May 2023 12:16:38 +0200 Subject: [PATCH 40/73] Improve SDK CI to choose the Docker image --- .github/workflows/sdks-tests.yml | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/.github/workflows/sdks-tests.yml b/.github/workflows/sdks-tests.yml index 617b446d1..c3b2c5d87 100644 --- a/.github/workflows/sdks-tests.yml +++ b/.github/workflows/sdks-tests.yml @@ -3,6 +3,11 @@ name: SDKs tests on: workflow_dispatch: + inputs: + docker_image: + description: 'The Meilisearch Docker image used' + required: false + default: nightly schedule: - cron: "0 6 * * MON" # Every Monday at 6:00AM @@ -17,7 +22,7 @@ jobs: runs-on: ubuntu-latest services: meilisearch: - image: getmeili/meilisearch:nightly + image: getmeili/meilisearch:${{ github.event.inputs.docker_image }} env: MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }} MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }} @@ -51,7 +56,7 @@ jobs: runs-on: ubuntu-latest services: meilisearch: - image: getmeili/meilisearch:nightly + image: getmeili/meilisearch:${{ github.event.inputs.docker_image }} env: MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }} MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }} @@ -77,7 +82,7 @@ jobs: runs-on: ubuntu-latest services: meilisearch: - image: getmeili/meilisearch:nightly + image: getmeili/meilisearch:${{ github.event.inputs.docker_image }} env: MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }} MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }} @@ -107,7 +112,7 @@ jobs: runs-on: ubuntu-latest services: meilisearch: - image: getmeili/meilisearch:nightly + image: getmeili/meilisearch:${{ github.event.inputs.docker_image }} env: MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }} MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }} @@ -131,7 +136,7 @@ jobs: runs-on: ubuntu-latest services: meilisearch: - image: getmeili/meilisearch:nightly + image: getmeili/meilisearch:${{ github.event.inputs.docker_image }} env: MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }} MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }} @@ -160,7 +165,7 @@ jobs: runs-on: ubuntu-latest services: meilisearch: - image: getmeili/meilisearch:nightly + image: getmeili/meilisearch:${{ github.event.inputs.docker_image }} env: MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }} MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }} @@ -184,7 +189,7 @@ jobs: runs-on: ubuntu-latest services: meilisearch: - image: getmeili/meilisearch:nightly + image: getmeili/meilisearch:${{ github.event.inputs.docker_image }} env: MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }} MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }} From c433bdd1cd11f18f75358141556c9224589cce51 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 25 May 2023 12:58:13 +0200 Subject: [PATCH 41/73] add a view for the task queue in the metrics --- grafana-dashboards/dashboard.json | 2275 +++++++++++++++++------------ index-scheduler/src/lib.rs | 30 +- meilisearch/src/metrics.rs | 3 + meilisearch/src/routes/metrics.rs | 6 + 4 files changed, 1352 insertions(+), 962 deletions(-) diff --git a/grafana-dashboards/dashboard.json b/grafana-dashboards/dashboard.json index 53865ad56..015831173 100644 --- a/grafana-dashboards/dashboard.json +++ b/grafana-dashboards/dashboard.json @@ -1,1007 +1,1360 @@ { - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": { - "type": "datasource", - "uid": "grafana" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 4, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "datasource": { + "type": "prometheus" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 14, + "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus" + }, + "refId": "A" } - ] + ], + "title": "Web application metrics", + "type": "row" }, - "editable": true, - "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": 1, - "links": [], - "liveNow": false, - "panels": [ - { - "collapsed": false, - "datasource": { - "type": "prometheus" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 14, - "panels": [], - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "i51CxikVz" - }, - "refId": "A" - } - ], - "title": "Web application metrics", - "type": "row" + { + "datasource": { + "type": "prometheus" }, - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "continuous-YlBl" - }, - "decimals": 0, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-YlBl" }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 4, - "x": 0, - "y": 1 - }, - "id": 2, - "interval": "5s", - "options": { - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "text": {} - }, - "pluginVersion": "9.0.5", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "editorMode": "builder", - "exemplar": true, - "expr": "meilisearch_db_size_bytes{job=\"meilisearch\", instance=\"$instance\"}", - "interval": "", - "legendFormat": "", - "range": true, - "refId": "A" - } - ], - "title": "Database Size", - "type": "gauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "fieldConfig": { - "defaults": { - "color": { - "fixedColor": "purple", - "mode": "fixed" - }, - "decimals": 0, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 4, - "x": 4, - "y": 1 - }, - "id": 22, - "interval": "5s", - "options": { - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "text": {} - }, - "pluginVersion": "9.0.5", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "editorMode": "builder", - "exemplar": true, - "expr": "meilisearch_index_count{job=\"meilisearch\", instance=\"$instance\"}", - "interval": "", - "legendFormat": "", - "range": true, - "refId": "A" - } - ], - "title": "Indexes Count", - "type": "gauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 4, - "x": 8, - "y": 1 - }, - "id": 18, - "options": { - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "text": {} - }, - "pluginVersion": "9.0.5", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "editorMode": "builder", - "expr": "meilisearch_index_docs_count{job=\"meilisearch\", index=\"$Index\", instance=\"$instance\"}", - "hide": false, - "range": true, - "refId": "A" - } - ], - "title": "Total Documents", - "type": "gauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 4, - "x": 12, - "y": 1 - }, - "id": 19, - "options": { - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "text": {} - }, - "pluginVersion": "9.0.5", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "editorMode": "builder", - "exemplar": true, - "expr": "round(increase(http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"meilisearch\"}[1h]))", - "interval": "", - "legendFormat": "", - "range": true, - "refId": "A" - } - ], - "title": "Total Searches (1h)", - "type": "gauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 4, - "x": 16, - "y": 1 - }, - "id": 20, - "options": { - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "text": {} - }, - "pluginVersion": "9.0.5", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "editorMode": "builder", - "exemplar": true, - "expr": "round(increase(http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"meilisearch\"}[24h]))", - "interval": "", - "legendFormat": "", - "range": true, - "refId": "A" - } - ], - "title": "Total Searches (24h)", - "type": "gauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 4, - "x": 20, - "y": 1 - }, - "id": 21, - "options": { - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "text": {} - }, - "pluginVersion": "9.0.5", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "editorMode": "builder", - "exemplar": true, - "expr": "round(increase(http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"meilisearch\"}[30d]))", - "interval": "", - "legendFormat": "", - "range": true, - "refId": "A" - } - ], - "title": "Total Searches (30d)", - "type": "gauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "continuous-YlBl" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 15, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" + { + "color": "red", + "value": 80 } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 0, + "y": 1 + }, + "id": 2, + "interval": "5s", + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus" + }, + "editorMode": "builder", + "exemplar": true, + "expr": "meilisearch_db_size_bytes{job=\"meilisearch\", instance=\"$instance\"}", + "interval": "", + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "Database Size", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "purple", + "mode": "fixed" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 4, + "y": 1 + }, + "id": 22, + "interval": "5s", + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus" + }, + "editorMode": "builder", + "exemplar": true, + "expr": "meilisearch_index_count{job=\"meilisearch\", instance=\"$instance\"}", + "interval": "", + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "Indexes Count", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 8, + "y": 1 + }, + "id": 18, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus" + }, + "editorMode": "builder", + "expr": "meilisearch_index_docs_count{job=\"meilisearch\", index=\"$Index\", instance=\"$instance\"}", + "hide": false, + "range": true, + "refId": "A" + } + ], + "title": "Total Documents", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 12, + "y": 1 + }, + "id": 19, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus" + }, + "editorMode": "builder", + "exemplar": true, + "expr": "round(increase(http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"meilisearch\"}[1h]))", + "interval": "", + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "Total Searches (1h)", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 16, + "y": 1 + }, + "id": 20, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus" + }, + "editorMode": "builder", + "exemplar": true, + "expr": "round(increase(http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"meilisearch\"}[24h]))", + "interval": "", + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "Total Searches (24h)", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 20, + "y": 1 + }, + "id": 21, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus" + }, + "editorMode": "builder", + "exemplar": true, + "expr": "round(increase(http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"meilisearch\"}[30d]))", + "interval": "", + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "Total Searches (30d)", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-YlBl" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "decimals": 2, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] - }, - "gridPos": { - "h": 11, - "w": 12, - "x": 0, - "y": 7 - }, - "id": 1, - "interval": "5s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "right" - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "editorMode": "builder", - "exemplar": true, - "expr": "rate(http_requests_total{instance=\"$instance\", job=\"meilisearch\"}[5m])", - "interval": "", - "legendFormat": "{{method}} {{path}}", - "range": true, - "refId": "A" - } - ], - "title": "HTTP requests per second (All Indexes)", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "continuous-YlBl" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 15, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false + "decimals": 2, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" + { + "color": "red", + "value": 80 } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 11, - "w": 12, - "x": 12, - "y": 7 - }, - "id": 3, - "interval": "5s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "right" - }, - "tooltip": { - "mode": "single", - "sort": "none" + ] } }, - "pluginVersion": "8.1.4", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "editorMode": "builder", - "exemplar": true, - "expr": "rate(http_response_time_seconds_sum{instance=\"$instance\", job=\"meilisearch\"}[5m]) / rate(http_response_time_seconds_count[5m])", - "interval": "", - "legendFormat": "{{method}} {{path}}", - "range": true, - "refId": "A" - } - ], - "title": "Mean response time (All Indexes)", - "type": "timeseries" + "overrides": [] }, - { - "cards": {}, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 7 + }, + "id": 1, + "interval": "5s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus" + }, + "editorMode": "builder", + "exemplar": true, + "expr": "rate(http_requests_total{instance=\"$instance\", job=\"meilisearch\"}[5m])", + "interval": "", + "legendFormat": "{{method}} {{path}}", + "range": true, + "refId": "A" + } + ], + "title": "HTTP requests per second (All Indexes)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-YlBl" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 12, + "y": 7 + }, + "id": 3, + "interval": "5s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.1.4", + "targets": [ + { + "datasource": { + "type": "prometheus" + }, + "editorMode": "builder", + "exemplar": true, + "expr": "rate(http_response_time_seconds_sum{instance=\"$instance\", job=\"meilisearch\"}[5m]) / rate(http_response_time_seconds_count[5m])", + "interval": "", + "legendFormat": "{{method}} {{path}}", + "range": true, + "refId": "A" + } + ], + "title": "Mean response time (All Indexes)", + "type": "timeseries" + }, + { + "cards": {}, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateBlues", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": { + "type": "prometheus" + }, + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 24, + "x": 0, + "y": 18 + }, + "heatmap": {}, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 16, + "legend": { + "show": false + }, + "options": { + "calculate": false, + "calculation": {}, + "cellGap": 2, + "cellValues": {}, "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateBlues", "exponent": 0.5, - "mode": "spectrum" + "fill": "#b4ff00", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Blues", + "steps": 128 }, - "dataFormat": "tsbuckets", - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" + "exemplars": { + "color": "rgba(255,0,255,0.7)" }, - "gridPos": { - "h": 12, - "w": 24, - "x": 0, - "y": 18 + "filterValues": { + "le": 1e-9 }, - "heatmap": {}, - "hideZeroBuckets": false, - "highlightCards": true, - "id": 16, "legend": { "show": false }, - "pluginVersion": "8.1.4", - "reverseYBuckets": false, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "editorMode": "builder", - "exemplar": true, - "expr": "sum by(le) (increase(http_response_time_seconds_bucket{path=\"/indexes/$Index/search\", instance=\"$instance\", job=\"meilisearch\"}[30s]))", - "format": "heatmap", - "interval": "", - "legendFormat": "{{le}}", - "range": true, - "refId": "A" - } - ], - "title": "Response time distribution over time (`POST /indexes/:index/search`)", + "rowsFrame": { + "layout": "auto" + }, + "showValue": "never", "tooltip": { "show": true, - "showHistogram": false + "yHistogram": false }, - "type": "heatmap", - "xAxis": { - "show": true - }, - "xBucketNumber": 10, "yAxis": { + "axisPlacement": "left", "decimals": 2, - "format": "s", - "logBase": 1, - "show": true - }, - "yBucketBound": "auto", - "yBucketNumber": 10 + "reverse": false, + "unit": "s" + } }, - { - "collapsed": false, - "datasource": { - "type": "prometheus", - "uid": "i51CxikVz" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 30 - }, - "id": 12, - "panels": [], - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "i51CxikVz" - }, - "refId": "A" - } - ], - "title": "System metrics", - "type": "row" + "pluginVersion": "9.5.2", + "reverseYBuckets": false, + "targets": [ + { + "datasource": { + "type": "prometheus" + }, + "editorMode": "builder", + "exemplar": true, + "expr": "sum by(le) (increase(http_response_time_seconds_bucket{path=\"/indexes/$Index/search\", instance=\"$instance\", job=\"meilisearch\"}[30s]))", + "format": "heatmap", + "interval": "", + "legendFormat": "{{le}}", + "range": true, + "refId": "A" + } + ], + "title": "Response time distribution over time (`POST /indexes/:index/search`)", + "tooltip": { + "show": true, + "showHistogram": false }, - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "continuous-YlBl" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 15, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "decimals": 2, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 11, - "w": 12, - "x": 0, - "y": 31 - }, - "id": 4, - "interval": "5s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "editorMode": "builder", - "exemplar": true, - "expr": "rate(process_cpu_seconds_total{job=\"meilisearch\", instance=\"$instance\"}[1m])", - "interval": "", - "legendFormat": "process", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "exemplar": true, - "expr": "sum(rate(container_cpu_usage_seconds_total{name='mongodb-redis'}[1m])) by (name)", - "interval": "", - "legendFormat": "container", - "refId": "B" - } - ], - "title": "CPU usage", - "type": "timeseries" + "type": "heatmap", + "xAxis": { + "show": true }, - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "continuous-YlBl" + "xBucketNumber": 10, + "yAxis": { + "decimals": 2, + "format": "s", + "logBase": 1, + "show": true + }, + "yBucketBound": "auto", + "yBucketNumber": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "bb3298a4-9acf-4da1-b86a-813f29f50888" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisLabel": "MiB", - "axisPlacement": "left", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 15, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] - }, - "gridPos": { - "h": 11, - "w": 12, - "x": 12, - "y": 31 - }, - "id": 5, - "interval": "5s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "unit": "none" }, - "targets": [ + "overrides": [ { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" + "__systemRef": "hideSeriesFrom", + "matcher": { + "id": "byNames", + "options": { + "mode": "exclude", + "names": [ + "succeeded " + ], + "prefix": "All except:", + "readOnly": true + } }, - "editorMode": "builder", - "exemplar": true, - "expr": "process_resident_memory_bytes{job=\"meilisearch\", instance=\"$instance\"} / 1024 / 1024", - "interval": "", - "legendFormat": "process", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "exemplar": true, - "expr": "container_memory_usage_bytes{name=\"mongodb-redis\"} / 1024 / 1024", - "interval": "", - "legendFormat": "container", - "refId": "B" + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": false, + "viz": true + } + } + ] } - ], - "title": "Memory usage", - "type": "timeseries" - } - ], - "refresh": "5s", - "schemaVersion": 36, - "style": "dark", - "tags": [], - "templating": { - "list": [ - { - "current": { - "selected": false, - "text": "localhost:7700", - "value": "localhost:7700" - }, - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "definition": "label_values(instance)", - "hide": 0, - "includeAll": false, - "label": "Instance", - "multi": false, - "name": "instance", - "options": [], - "query": { - "query": "label_values(instance)", - "refId": "StandardVariableQuery" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "type": "query" + ] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 30 + }, + "id": 23, + "interval": "5s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true }, - { - "current": { - "selected": false, - "text": "movie-collection", - "value": "movie-collection" - }, - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "definition": "label_values(index)", - "hide": 0, - "includeAll": false, - "label": "index", - "multi": false, - "name": "Index", - "options": [], - "query": { - "query": "label_values(index)", - "refId": "StandardVariableQuery" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "type": "query" + "tooltip": { + "mode": "single", + "sort": "none" } - ] + }, + "pluginVersion": "8.1.4", + "targets": [ + { + "datasource": { + "type": "prometheus" + }, + "editorMode": "builder", + "exemplar": true, + "expr": "nb_tasks{instance=\"$instance\", job=\"meilisearch\", kind=\"statuses\"}", + "interval": "", + "legendFormat": "{{value}} ", + "range": true, + "refId": "A" + } + ], + "title": "Number of tasks by statuses", + "type": "timeseries" }, - "time": { - "from": "now-15m", - "to": "now" + { + "datasource": { + "type": "prometheus", + "uid": "bb3298a4-9acf-4da1-b86a-813f29f50888" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 12, + "y": 30 + }, + "id": 24, + "interval": "5s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.1.4", + "targets": [ + { + "datasource": { + "type": "prometheus" + }, + "editorMode": "builder", + "exemplar": true, + "expr": "nb_tasks{instance=\"$instance\", job=\"meilisearch\", kind=\"types\"}", + "interval": "", + "legendFormat": "{{value}} ", + "range": true, + "refId": "A" + } + ], + "title": "Number of tasks by types", + "type": "timeseries" }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m" - ] + { + "datasource": { + "type": "prometheus", + "uid": "bb3298a4-9acf-4da1-b86a-813f29f50888" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 41 + }, + "id": 25, + "interval": "5s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.1.4", + "targets": [ + { + "datasource": { + "type": "prometheus" + }, + "editorMode": "builder", + "exemplar": true, + "expr": "nb_tasks{instance=\"$instance\", job=\"meilisearch\", kind=\"indexes\"}", + "interval": "", + "legendFormat": "{{value}} ", + "range": true, + "refId": "A" + } + ], + "title": "Number of tasks by indexes", + "type": "timeseries" }, - "timezone": "", - "title": "Meilisearch", - "uid": "7wcZ94dnz", - "version": 47, - "weekStart": "" - } \ No newline at end of file + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "i51CxikVz" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 52 + }, + "id": 12, + "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "i51CxikVz" + }, + "refId": "A" + } + ], + "title": "System metrics", + "type": "row" + }, + { + "datasource": { + "type": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-YlBl" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 2, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 53 + }, + "id": 4, + "interval": "5s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus" + }, + "editorMode": "builder", + "exemplar": true, + "expr": "rate(process_cpu_seconds_total{job=\"meilisearch\", instance=\"$instance\"}[1m])", + "interval": "", + "legendFormat": "process", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus" + }, + "exemplar": true, + "expr": "sum(rate(container_cpu_usage_seconds_total{name='mongodb-redis'}[1m])) by (name)", + "interval": "", + "legendFormat": "container", + "refId": "B" + } + ], + "title": "CPU usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-YlBl" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "MiB", + "axisPlacement": "left", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 12, + "y": 53 + }, + "id": 5, + "interval": "5s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus" + }, + "editorMode": "builder", + "exemplar": true, + "expr": "process_resident_memory_bytes{job=\"meilisearch\", instance=\"$instance\"} / 1024 / 1024", + "interval": "", + "legendFormat": "process", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus" + }, + "exemplar": true, + "expr": "container_memory_usage_bytes{name=\"mongodb-redis\"} / 1024 / 1024", + "interval": "", + "legendFormat": "container", + "refId": "B" + } + ], + "title": "Memory usage", + "type": "timeseries" + } + ], + "refresh": "5s", + "schemaVersion": 38, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "localhost:7700", + "value": "localhost:7700" + }, + "datasource": { + "type": "prometheus", + "uid": "bb3298a4-9acf-4da1-b86a-813f29f50888" + }, + "definition": "label_values(instance)", + "hide": 0, + "includeAll": false, + "label": "Instance", + "multi": false, + "name": "instance", + "options": [], + "query": { + "query": "label_values(instance)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "mieli", + "value": "mieli" + }, + "datasource": { + "type": "prometheus", + "uid": "bb3298a4-9acf-4da1-b86a-813f29f50888" + }, + "definition": "label_values(index)", + "hide": 0, + "includeAll": false, + "label": "index", + "multi": false, + "name": "Index", + "options": [], + "query": { + "query": "label_values(index)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-5m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m" + ] + }, + "timezone": "", + "title": "Meilisearch", + "uid": "7wcZ94dnz", + "version": 6, + "weekStart": "" +} \ No newline at end of file diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index af20ba1ae..a05be8afb 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -31,7 +31,7 @@ mod uuid_codec; pub type Result = std::result::Result; pub type TaskId = u32; -use std::collections::HashMap; +use std::collections::{BTreeMap, HashMap}; use std::ops::{Bound, RangeBounds}; use std::path::{Path, PathBuf}; use std::sync::atomic::AtomicBool; @@ -737,6 +737,34 @@ impl IndexScheduler { Ok(tasks) } + pub fn get_stats(&self) -> Result>> { + let rtxn = self.read_txn()?; + + let mut res = BTreeMap::new(); + + res.insert( + "statuses".to_string(), + enum_iterator::all::() + .map(|s| Ok((s.to_string(), self.get_status(&rtxn, s)?.len()))) + .collect::>>()?, + ); + res.insert( + "types".to_string(), + enum_iterator::all::() + .map(|s| Ok((s.to_string(), self.get_kind(&rtxn, s)?.len()))) + .collect::>>()?, + ); + res.insert( + "indexes".to_string(), + self.index_tasks + .iter(&rtxn)? + .map(|res| Ok(res.map(|(name, bitmap)| (name.to_string(), bitmap.len()))?)) + .collect::>>()?, + ); + + Ok(res) + } + /// Return true iff there is at least one task associated with this index /// that is processing. pub fn is_index_processing(&self, index: &str) -> Result { diff --git a/meilisearch/src/metrics.rs b/meilisearch/src/metrics.rs index 7ee5241db..4982e19b7 100644 --- a/meilisearch/src/metrics.rs +++ b/meilisearch/src/metrics.rs @@ -40,4 +40,7 @@ lazy_static! { HTTP_RESPONSE_TIME_CUSTOM_BUCKETS.to_vec() ) .expect("Can't create a metric"); + pub static ref NB_TASKS: IntGaugeVec = + register_int_gauge_vec!(opts!("nb_tasks", "Number of tasks"), &["kind", "value"]) + .expect("Can't create a metric"); } diff --git a/meilisearch/src/routes/metrics.rs b/meilisearch/src/routes/metrics.rs index 83cd50542..d3cee4de5 100644 --- a/meilisearch/src/routes/metrics.rs +++ b/meilisearch/src/routes/metrics.rs @@ -39,6 +39,12 @@ pub async fn get_metrics( .set(value.number_of_documents as i64); } + for (kind, value) in index_scheduler.get_stats()? { + for (value, count) in value { + crate::metrics::NB_TASKS.with_label_values(&[&kind, &value]).set(count as i64); + } + } + let encoder = TextEncoder::new(); let mut buffer = vec![]; encoder.encode(&prometheus::gather(), &mut buffer).expect("Failed to encode metrics"); From 35d5556f1f08d762f9cbc9e8b25375b3a937221c Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 25 May 2023 17:41:53 +0200 Subject: [PATCH 42/73] prefix all the metrics by meilisearch_ --- grafana-dashboards/dashboard.json | 108 +++++++++++------------------- meilisearch/src/metrics.rs | 14 ++-- meilisearch/src/middleware.rs | 4 +- meilisearch/src/routes/metrics.rs | 4 +- 4 files changed, 51 insertions(+), 79 deletions(-) diff --git a/grafana-dashboards/dashboard.json b/grafana-dashboards/dashboard.json index 015831173..ec5d8530b 100644 --- a/grafana-dashboards/dashboard.json +++ b/grafana-dashboards/dashboard.json @@ -24,7 +24,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 4, + "id": 2, "links": [], "liveNow": false, "panels": [ @@ -59,20 +59,17 @@ "fieldConfig": { "defaults": { "color": { - "mode": "continuous-YlBl" + "mode": "continuous-GrYlRd" }, "decimals": 0, "mappings": [], + "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null - }, - { - "color": "red", - "value": 80 } ] }, @@ -89,6 +86,9 @@ "id": 2, "interval": "5s", "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ @@ -97,9 +97,7 @@ "fields": "", "values": false }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "text": {} + "textMode": "auto" }, "pluginVersion": "9.5.2", "targets": [ @@ -117,7 +115,7 @@ } ], "title": "Database Size", - "type": "gauge" + "type": "stat" }, { "datasource": { @@ -243,7 +241,8 @@ }, { "datasource": { - "type": "prometheus" + "type": "prometheus", + "uid": "c4085c47-f6d3-45dd-b761-6809055bb749" }, "fieldConfig": { "defaults": { @@ -295,7 +294,7 @@ }, "editorMode": "builder", "exemplar": true, - "expr": "round(increase(http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"meilisearch\"}[1h]))", + "expr": "round(increase(meilisearch_http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"meilisearch\"}[1h]))", "interval": "", "legendFormat": "", "range": true, @@ -307,7 +306,8 @@ }, { "datasource": { - "type": "prometheus" + "type": "prometheus", + "uid": "c4085c47-f6d3-45dd-b761-6809055bb749" }, "fieldConfig": { "defaults": { @@ -355,7 +355,7 @@ }, "editorMode": "builder", "exemplar": true, - "expr": "round(increase(http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"meilisearch\"}[24h]))", + "expr": "round(increase(meilisearch_http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"meilisearch\"}[24h]))", "interval": "", "legendFormat": "", "range": true, @@ -367,7 +367,8 @@ }, { "datasource": { - "type": "prometheus" + "type": "prometheus", + "uid": "c4085c47-f6d3-45dd-b761-6809055bb749" }, "fieldConfig": { "defaults": { @@ -415,7 +416,7 @@ }, "editorMode": "builder", "exemplar": true, - "expr": "round(increase(http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"meilisearch\"}[30d]))", + "expr": "round(increase(meilisearch_http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"meilisearch\"}[30d]))", "interval": "", "legendFormat": "", "range": true, @@ -427,7 +428,8 @@ }, { "datasource": { - "type": "prometheus" + "type": "prometheus", + "uid": "c4085c47-f6d3-45dd-b761-6809055bb749" }, "fieldConfig": { "defaults": { @@ -509,7 +511,7 @@ }, "editorMode": "builder", "exemplar": true, - "expr": "rate(http_requests_total{instance=\"$instance\", job=\"meilisearch\"}[5m])", + "expr": "rate(meilisearch_http_requests_total{instance=\"$instance\", job=\"meilisearch\"}[5m])", "interval": "", "legendFormat": "{{method}} {{path}}", "range": true, @@ -643,7 +645,7 @@ "overrides": [] }, "gridPos": { - "h": 12, + "h": 11, "w": 24, "x": 0, "y": 18 @@ -664,9 +666,9 @@ "exponent": 0.5, "fill": "#b4ff00", "mode": "scheme", - "reverse": false, + "reverse": true, "scale": "exponential", - "scheme": "Blues", + "scheme": "RdYlBu", "steps": 128 }, "exemplars": { @@ -688,7 +690,7 @@ }, "yAxis": { "axisPlacement": "left", - "decimals": 2, + "decimals": 0, "reverse": false, "unit": "s" } @@ -730,10 +732,7 @@ "yBucketNumber": 10 }, { - "datasource": { - "type": "prometheus", - "uid": "bb3298a4-9acf-4da1-b86a-813f29f50888" - }, + "datasource": {}, "fieldConfig": { "defaults": { "color": { @@ -785,38 +784,13 @@ }, "unit": "none" }, - "overrides": [ - { - "__systemRef": "hideSeriesFrom", - "matcher": { - "id": "byNames", - "options": { - "mode": "exclude", - "names": [ - "succeeded " - ], - "prefix": "All except:", - "readOnly": true - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": false, - "tooltip": false, - "viz": true - } - } - ] - } - ] + "overrides": [] }, "gridPos": { "h": 11, "w": 12, "x": 0, - "y": 30 + "y": 29 }, "id": 23, "interval": "5s", @@ -840,7 +814,7 @@ }, "editorMode": "builder", "exemplar": true, - "expr": "nb_tasks{instance=\"$instance\", job=\"meilisearch\", kind=\"statuses\"}", + "expr": "meilisearch_nb_tasks{instance=\"$instance\", job=\"meilisearch\", kind=\"statuses\"}", "interval": "", "legendFormat": "{{value}} ", "range": true, @@ -851,10 +825,7 @@ "type": "timeseries" }, { - "datasource": { - "type": "prometheus", - "uid": "bb3298a4-9acf-4da1-b86a-813f29f50888" - }, + "datasource": {}, "fieldConfig": { "defaults": { "color": { @@ -912,7 +883,7 @@ "h": 11, "w": 12, "x": 12, - "y": 30 + "y": 29 }, "id": 24, "interval": "5s", @@ -936,7 +907,7 @@ }, "editorMode": "builder", "exemplar": true, - "expr": "nb_tasks{instance=\"$instance\", job=\"meilisearch\", kind=\"types\"}", + "expr": "meilisearch_nb_tasks{instance=\"$instance\", job=\"meilisearch\", kind=\"types\"}", "interval": "", "legendFormat": "{{value}} ", "range": true, @@ -947,10 +918,7 @@ "type": "timeseries" }, { - "datasource": { - "type": "prometheus", - "uid": "bb3298a4-9acf-4da1-b86a-813f29f50888" - }, + "datasource": {}, "fieldConfig": { "defaults": { "color": { @@ -1008,7 +976,7 @@ "h": 11, "w": 12, "x": 0, - "y": 41 + "y": 40 }, "id": 25, "interval": "5s", @@ -1032,7 +1000,7 @@ }, "editorMode": "builder", "exemplar": true, - "expr": "nb_tasks{instance=\"$instance\", job=\"meilisearch\", kind=\"indexes\"}", + "expr": "meilisearch_nb_tasks{instance=\"$instance\", job=\"meilisearch\", kind=\"indexes\"}", "interval": "", "legendFormat": "{{value}} ", "range": true, @@ -1052,7 +1020,7 @@ "h": 1, "w": 24, "x": 0, - "y": 52 + "y": 51 }, "id": 12, "panels": [], @@ -1130,7 +1098,7 @@ "h": 11, "w": 12, "x": 0, - "y": 53 + "y": 52 }, "id": 4, "interval": "5s", @@ -1233,7 +1201,7 @@ "h": 11, "w": 12, "x": 12, - "y": 53 + "y": 52 }, "id": 5, "interval": "5s", @@ -1355,6 +1323,6 @@ "timezone": "", "title": "Meilisearch", "uid": "7wcZ94dnz", - "version": 6, + "version": 3, "weekStart": "" } \ No newline at end of file diff --git a/meilisearch/src/metrics.rs b/meilisearch/src/metrics.rs index 4982e19b7..07c7706bf 100644 --- a/meilisearch/src/metrics.rs +++ b/meilisearch/src/metrics.rs @@ -17,8 +17,8 @@ fn create_buckets() -> [f64; 29] { lazy_static! { pub static ref HTTP_RESPONSE_TIME_CUSTOM_BUCKETS: [f64; 29] = create_buckets(); - pub static ref HTTP_REQUESTS_TOTAL: IntCounterVec = register_int_counter_vec!( - opts!("http_requests_total", "HTTP requests total"), + pub static ref MEILISEARCH_HTTP_REQUESTS_TOTAL: IntCounterVec = register_int_counter_vec!( + opts!("meilisearch_http_requests_total", "Meilisearch HTTP requests total"), &["method", "path"] ) .expect("Can't create a metric"); @@ -33,14 +33,16 @@ lazy_static! { &["index"] ) .expect("Can't create a metric"); - pub static ref HTTP_RESPONSE_TIME_SECONDS: HistogramVec = register_histogram_vec!( + pub static ref MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS: HistogramVec = register_histogram_vec!( "http_response_time_seconds", "HTTP response times", &["method", "path"], HTTP_RESPONSE_TIME_CUSTOM_BUCKETS.to_vec() ) .expect("Can't create a metric"); - pub static ref NB_TASKS: IntGaugeVec = - register_int_gauge_vec!(opts!("nb_tasks", "Number of tasks"), &["kind", "value"]) - .expect("Can't create a metric"); + pub static ref MEILISEARCH_NB_TASKS: IntGaugeVec = register_int_gauge_vec!( + opts!("meilisearch_nb_tasks", "Meilisearch Number of tasks"), + &["kind", "value"] + ) + .expect("Can't create a metric"); } diff --git a/meilisearch/src/middleware.rs b/meilisearch/src/middleware.rs index 080a52634..a8c981dca 100644 --- a/meilisearch/src/middleware.rs +++ b/meilisearch/src/middleware.rs @@ -52,11 +52,11 @@ where if is_registered_resource { let request_method = req.method().to_string(); histogram_timer = Some( - crate::metrics::HTTP_RESPONSE_TIME_SECONDS + crate::metrics::MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS .with_label_values(&[&request_method, request_path]) .start_timer(), ); - crate::metrics::HTTP_REQUESTS_TOTAL + crate::metrics::MEILISEARCH_HTTP_REQUESTS_TOTAL .with_label_values(&[&request_method, request_path]) .inc(); } diff --git a/meilisearch/src/routes/metrics.rs b/meilisearch/src/routes/metrics.rs index d3cee4de5..3c9a52e26 100644 --- a/meilisearch/src/routes/metrics.rs +++ b/meilisearch/src/routes/metrics.rs @@ -41,7 +41,9 @@ pub async fn get_metrics( for (kind, value) in index_scheduler.get_stats()? { for (value, count) in value { - crate::metrics::NB_TASKS.with_label_values(&[&kind, &value]).set(count as i64); + crate::metrics::MEILISEARCH_NB_TASKS + .with_label_values(&[&kind, &value]) + .set(count as i64); } } From c9b65677bf153571df274cf959719311821bfc8c Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 25 May 2023 18:30:30 +0200 Subject: [PATCH 43/73] return the on disk size actually used by meilisearch --- grafana-dashboards/dashboard.json | 219 ++++++++++++++---------- index-scheduler/src/index_mapper/mod.rs | 6 +- index-scheduler/src/lib.rs | 6 + meilisearch-auth/src/lib.rs | 5 + meilisearch-auth/src/store.rs | 5 + meilisearch/src/metrics.rs | 7 +- meilisearch/src/routes/metrics.rs | 1 + meilisearch/src/routes/mod.rs | 12 +- 8 files changed, 169 insertions(+), 92 deletions(-) diff --git a/grafana-dashboards/dashboard.json b/grafana-dashboards/dashboard.json index ec5d8530b..b546b4969 100644 --- a/grafana-dashboards/dashboard.json +++ b/grafana-dashboards/dashboard.json @@ -52,71 +52,6 @@ "title": "Web application metrics", "type": "row" }, - { - "datasource": { - "type": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "continuous-GrYlRd" - }, - "decimals": 0, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 4, - "x": 0, - "y": 1 - }, - "id": 2, - "interval": "5s", - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "pluginVersion": "9.5.2", - "targets": [ - { - "datasource": { - "type": "prometheus" - }, - "editorMode": "builder", - "exemplar": true, - "expr": "meilisearch_db_size_bytes{job=\"meilisearch\", instance=\"$instance\"}", - "interval": "", - "legendFormat": "", - "range": true, - "refId": "A" - } - ], - "title": "Database Size", - "type": "stat" - }, { "datasource": { "type": "prometheus" @@ -145,7 +80,7 @@ "gridPos": { "h": 6, "w": 4, - "x": 4, + "x": 0, "y": 1 }, "id": 22, @@ -206,7 +141,7 @@ "gridPos": { "h": 6, "w": 4, - "x": 8, + "x": 4, "y": 1 }, "id": 18, @@ -431,10 +366,11 @@ "type": "prometheus", "uid": "c4085c47-f6d3-45dd-b761-6809055bb749" }, + "description": "", "fieldConfig": { "defaults": { "color": { - "mode": "continuous-YlBl" + "mode": "palette-classic" }, "custom": { "axisCenteredZero": false, @@ -443,7 +379,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 15, + "fillOpacity": 25, "gradientMode": "none", "hideFrom": { "legend": false, @@ -456,18 +392,18 @@ "scaleDistribution": { "type": "linear" }, - "showPoints": "never", + "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, - "decimals": 2, "mappings": [], + "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -480,7 +416,8 @@ "value": 80 } ] - } + }, + "unit": "bytes" }, "overrides": [] }, @@ -490,13 +427,13 @@ "x": 0, "y": 7 }, - "id": 1, + "id": 2, "interval": "5s", "options": { "legend": { "calcs": [], "displayMode": "list", - "placement": "right", + "placement": "bottom", "showLegend": true }, "tooltip": { @@ -504,6 +441,7 @@ "sort": "none" } }, + "pluginVersion": "9.5.2", "targets": [ { "datasource": { @@ -511,14 +449,26 @@ }, "editorMode": "builder", "exemplar": true, - "expr": "rate(meilisearch_http_requests_total{instance=\"$instance\", job=\"meilisearch\"}[5m])", + "expr": "meilisearch_db_size_bytes{job=\"meilisearch\", instance=\"$instance\"}", "interval": "", - "legendFormat": "{{method}} {{path}}", + "legendFormat": "Database size on disk", "range": true, - "refId": "A" + "refId": "DB Size on disk" + }, + { + "datasource": { + "type": "prometheus", + "uid": "c4085c47-f6d3-45dd-b761-6809055bb749" + }, + "editorMode": "builder", + "expr": "meilisearch_used_db_size_bytes{job=\"meilisearch\", instance=\"$instance\"}", + "hide": false, + "legendFormat": "Used bytes", + "range": true, + "refId": "Actual used bytes" } ], - "title": "HTTP requests per second (All Indexes)", + "title": "Database Size in bytes", "type": "timeseries" }, { @@ -616,6 +566,101 @@ "title": "Mean response time (All Indexes)", "type": "timeseries" }, + { + "datasource": { + "type": "prometheus", + "uid": "c4085c47-f6d3-45dd-b761-6809055bb749" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-YlBl" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 2, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 18 + }, + "id": 1, + "interval": "5s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus" + }, + "editorMode": "builder", + "exemplar": true, + "expr": "rate(meilisearch_http_requests_total{instance=\"$instance\", job=\"meilisearch\"}[5m])", + "interval": "", + "legendFormat": "{{method}} {{path}}", + "range": true, + "refId": "A" + } + ], + "title": "HTTP requests per second (All Indexes)", + "type": "timeseries" + }, { "cards": {}, "color": { @@ -648,7 +693,7 @@ "h": 11, "w": 24, "x": 0, - "y": 18 + "y": 29 }, "heatmap": {}, "hideZeroBuckets": false, @@ -790,7 +835,7 @@ "h": 11, "w": 12, "x": 0, - "y": 29 + "y": 40 }, "id": 23, "interval": "5s", @@ -883,7 +928,7 @@ "h": 11, "w": 12, "x": 12, - "y": 29 + "y": 40 }, "id": 24, "interval": "5s", @@ -976,7 +1021,7 @@ "h": 11, "w": 12, "x": 0, - "y": 40 + "y": 51 }, "id": 25, "interval": "5s", @@ -1020,7 +1065,7 @@ "h": 1, "w": 24, "x": 0, - "y": 51 + "y": 62 }, "id": 12, "panels": [], @@ -1098,7 +1143,7 @@ "h": 11, "w": 12, "x": 0, - "y": 52 + "y": 63 }, "id": 4, "interval": "5s", @@ -1201,7 +1246,7 @@ "h": 11, "w": 12, "x": 12, - "y": 52 + "y": 63 }, "id": 5, "interval": "5s", @@ -1323,6 +1368,6 @@ "timezone": "", "title": "Meilisearch", "uid": "7wcZ94dnz", - "version": 3, + "version": 4, "weekStart": "" } \ No newline at end of file diff --git a/index-scheduler/src/index_mapper/mod.rs b/index-scheduler/src/index_mapper/mod.rs index 2bf6f46ad..86bec2927 100644 --- a/index-scheduler/src/index_mapper/mod.rs +++ b/index-scheduler/src/index_mapper/mod.rs @@ -90,6 +90,8 @@ pub struct IndexStats { pub number_of_documents: u64, /// Size of the index' DB, in bytes. pub database_size: u64, + /// Size of the index' DB, in bytes. + pub used_database_size: u64, /// Association of every field name with the number of times it occurs in the documents. pub field_distribution: FieldDistribution, /// Creation date of the index. @@ -105,10 +107,10 @@ impl IndexStats { /// /// - rtxn: a RO transaction for the index, obtained from `Index::read_txn()`. pub fn new(index: &Index, rtxn: &RoTxn) -> Result { - let database_size = index.on_disk_size()?; Ok(IndexStats { number_of_documents: index.number_of_documents(rtxn)?, - database_size, + database_size: index.on_disk_size()?, + used_database_size: index.used_size()?, field_distribution: index.field_distribution(rtxn)?, created_at: index.created_at(rtxn)?, updated_at: index.updated_at(rtxn)?, diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index a05be8afb..c27546c8e 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -554,10 +554,16 @@ impl IndexScheduler { &self.index_mapper.indexer_config } + /// Return the real database size (i.e.: The size **with** the free pages) pub fn size(&self) -> Result { Ok(self.env.real_disk_size()?) } + /// Return the used database size (i.e.: The size **without** the free pages) + pub fn used_size(&self) -> Result { + Ok(self.env.non_free_pages_size()?) + } + /// Return the index corresponding to the name. /// /// * If the index wasn't opened before, the index will be opened. diff --git a/meilisearch-auth/src/lib.rs b/meilisearch-auth/src/lib.rs index 2a02776bd..e74f1707c 100644 --- a/meilisearch-auth/src/lib.rs +++ b/meilisearch-auth/src/lib.rs @@ -45,6 +45,11 @@ impl AuthController { self.store.size() } + /// Return the used size of the `AuthController` database in bytes. + pub fn used_size(&self) -> Result { + self.store.used_size() + } + pub fn create_key(&self, create_key: CreateApiKey) -> Result { match self.store.get_api_key(create_key.uid)? { Some(_) => Err(AuthControllerError::ApiKeyAlreadyExists(create_key.uid.to_string())), diff --git a/meilisearch-auth/src/store.rs b/meilisearch-auth/src/store.rs index 5c2776154..eb93f5a46 100644 --- a/meilisearch-auth/src/store.rs +++ b/meilisearch-auth/src/store.rs @@ -73,6 +73,11 @@ impl HeedAuthStore { Ok(self.env.real_disk_size()?) } + /// Return the number of bytes actually used in the database + pub fn used_size(&self) -> Result { + Ok(self.env.non_free_pages_size()?) + } + pub fn set_drop_on_close(&mut self, v: bool) { self.should_close_on_drop = v; } diff --git a/meilisearch/src/metrics.rs b/meilisearch/src/metrics.rs index 07c7706bf..79332f360 100644 --- a/meilisearch/src/metrics.rs +++ b/meilisearch/src/metrics.rs @@ -23,8 +23,13 @@ lazy_static! { ) .expect("Can't create a metric"); pub static ref MEILISEARCH_DB_SIZE_BYTES: IntGauge = - register_int_gauge!(opts!("meilisearch_db_size_bytes", "Meilisearch Db Size In Bytes")) + register_int_gauge!(opts!("meilisearch_db_size_bytes", "Meilisearch DB Size In Bytes")) .expect("Can't create a metric"); + pub static ref MEILISEARCH_USED_DB_SIZE_BYTES: IntGauge = register_int_gauge!(opts!( + "meilisearch_used_db_size_bytes", + "Meilisearch Used DB Size In Bytes" + )) + .expect("Can't create a metric"); pub static ref MEILISEARCH_INDEX_COUNT: IntGauge = register_int_gauge!(opts!("meilisearch_index_count", "Meilisearch Index Count")) .expect("Can't create a metric"); diff --git a/meilisearch/src/routes/metrics.rs b/meilisearch/src/routes/metrics.rs index 3c9a52e26..a7d41e33e 100644 --- a/meilisearch/src/routes/metrics.rs +++ b/meilisearch/src/routes/metrics.rs @@ -31,6 +31,7 @@ pub async fn get_metrics( let response = create_all_stats((*index_scheduler).clone(), auth_controller, auth_filters)?; crate::metrics::MEILISEARCH_DB_SIZE_BYTES.set(response.database_size as i64); + crate::metrics::MEILISEARCH_USED_DB_SIZE_BYTES.set(response.used_database_size as i64); crate::metrics::MEILISEARCH_INDEX_COUNT.set(response.indexes.len() as i64); for (index, value) in response.indexes.iter() { diff --git a/meilisearch/src/routes/mod.rs b/meilisearch/src/routes/mod.rs index 51340ac1b..57d670b5f 100644 --- a/meilisearch/src/routes/mod.rs +++ b/meilisearch/src/routes/mod.rs @@ -231,6 +231,8 @@ pub async fn running() -> HttpResponse { #[serde(rename_all = "camelCase")] pub struct Stats { pub database_size: u64, + #[serde(skip)] + pub used_database_size: u64, #[serde(serialize_with = "time::serde::rfc3339::option::serialize")] pub last_update: Option, pub indexes: BTreeMap, @@ -259,6 +261,7 @@ pub fn create_all_stats( let mut last_task: Option = None; let mut indexes = BTreeMap::new(); let mut database_size = 0; + let mut used_database_size = 0; for index_uid in index_scheduler.index_names()? { // Accumulate the size of all indexes, even unauthorized ones, so @@ -266,6 +269,7 @@ pub fn create_all_stats( // See for context. let stats = index_scheduler.index_stats(&index_uid)?; database_size += stats.inner_stats.database_size; + used_database_size += stats.inner_stats.used_database_size; if !filters.is_index_authorized(&index_uid) { continue; @@ -278,10 +282,14 @@ pub fn create_all_stats( } database_size += index_scheduler.size()?; + used_database_size += index_scheduler.used_size()?; database_size += auth_controller.size()?; - database_size += index_scheduler.compute_update_file_size()?; + used_database_size += auth_controller.used_size()?; + let update_file_size = index_scheduler.compute_update_file_size()?; + database_size += update_file_size; + used_database_size += update_file_size; - let stats = Stats { database_size, last_update: last_task, indexes }; + let stats = Stats { database_size, used_database_size, last_update: last_task, indexes }; Ok(stats) } From 51dce9e9d1f6f7a5a67174dd658ac39604737121 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 25 May 2023 18:33:01 +0200 Subject: [PATCH 44/73] improve the dashboard slightly --- grafana-dashboards/dashboard.json | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/grafana-dashboards/dashboard.json b/grafana-dashboards/dashboard.json index b546b4969..19355650b 100644 --- a/grafana-dashboards/dashboard.json +++ b/grafana-dashboards/dashboard.json @@ -387,6 +387,9 @@ "viz": false }, "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, "lineWidth": 1, "pointSize": 5, "scaleDistribution": { @@ -396,7 +399,7 @@ "spanNulls": false, "stacking": { "group": "A", - "mode": "normal" + "mode": "none" }, "thresholdsStyle": { "mode": "off" @@ -1368,6 +1371,6 @@ "timezone": "", "title": "Meilisearch", "uid": "7wcZ94dnz", - "version": 4, + "version": 5, "weekStart": "" } \ No newline at end of file From 73198179f1d0cca0dca93aa4ee6e8a56cf6da1c7 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 29 May 2023 10:08:27 +0200 Subject: [PATCH 45/73] Consistently use wrapping add to avoid overflow in debug when query starts with a separator --- milli/src/search/new/query_term/parse_query.rs | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/milli/src/search/new/query_term/parse_query.rs b/milli/src/search/new/query_term/parse_query.rs index bf90748e4..0120772be 100644 --- a/milli/src/search/new/query_term/parse_query.rs +++ b/milli/src/search/new/query_term/parse_query.rs @@ -77,13 +77,9 @@ pub fn located_query_terms_from_tokens( } } TokenKind::Separator(separator_kind) => { - match separator_kind { - SeparatorKind::Hard => { - position += 1; - } - SeparatorKind::Soft => { - position += 0; - } + // add penalty for hard separators + if let SeparatorKind::Hard = separator_kind { + position = position.wrapping_add(1); } phrase = 'phrase: { From 1dfc4038abccf082f4961ac91c5faf77f546f671 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 29 May 2023 11:58:26 +0200 Subject: [PATCH 46/73] Add test that fails before PR and passes now --- .../src/search/new/query_term/parse_query.rs | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/milli/src/search/new/query_term/parse_query.rs b/milli/src/search/new/query_term/parse_query.rs index 0120772be..69c2cd9c9 100644 --- a/milli/src/search/new/query_term/parse_query.rs +++ b/milli/src/search/new/query_term/parse_query.rs @@ -284,3 +284,36 @@ impl PhraseBuilder { }) } } + +#[cfg(test)] +mod tests { + use charabia::TokenizerBuilder; + + use super::*; + use crate::index::tests::TempIndex; + + fn temp_index_with_documents() -> TempIndex { + let temp_index = TempIndex::new(); + temp_index + .add_documents(documents!([ + { "id": 1, "name": "split this world westfali westfalia the Ŵôřlḑôle" }, + { "id": 2, "name": "Westfália" }, + { "id": 3, "name": "Ŵôřlḑôle" }, + ])) + .unwrap(); + temp_index + } + + #[test] + fn start_with_hard_separator() -> Result<()> { + let tokenizer = TokenizerBuilder::new().build(); + let tokens = tokenizer.tokenize("."); + let index = temp_index_with_documents(); + let rtxn = index.read_txn()?; + let mut ctx = SearchContext::new(&index, &rtxn); + // panics with `attempt to add with overflow` before + let located_query_terms = located_query_terms_from_tokens(&mut ctx, tokens, None)?; + assert!(located_query_terms.is_empty()); + Ok(()) + } +} From 6c6387d05e1fe8a700b274e1703c621cef09f2b4 Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 29 May 2023 12:27:39 +0200 Subject: [PATCH 47/73] move the fuzzer to its own crate --- Cargo.lock | 708 ++++++++++++++---------------- Cargo.toml | 3 +- fuzzers/Cargo.toml | 20 + fuzzers/README.md | 3 + fuzzers/src/bin/fuzz.rs | 136 ++++++ fuzzers/src/lib.rs | 46 ++ milli/Cargo.toml | 7 - milli/{examples => tests}/fuzz.rs | 4 +- 8 files changed, 531 insertions(+), 396 deletions(-) create mode 100644 fuzzers/Cargo.toml create mode 100644 fuzzers/README.md create mode 100644 fuzzers/src/bin/fuzz.rs create mode 100644 fuzzers/src/lib.rs rename milli/{examples => tests}/fuzz.rs (99%) diff --git a/Cargo.lock b/Cargo.lock index a432908a2..83950615a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,19 +4,19 @@ version = 3 [[package]] name = "actix-codec" -version = "0.5.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57a7559404a7f3573127aab53c08ce37a6c6a315c374a31070f3c91cd1b4a7fe" +checksum = "617a8268e3537fe1d8c9ead925fca49ef6400927ee7bc26750e90ecee14ce4b8" dependencies = [ "bitflags", "bytes", "futures-core", "futures-sink", - "log", "memchr", "pin-project-lite", "tokio", "tokio-util", + "tracing", ] [[package]] @@ -46,7 +46,7 @@ dependencies = [ "actix-tls", "actix-utils", "ahash 0.8.3", - "base64 0.21.0", + "base64 0.21.2", "bitflags", "brotli", "bytes", @@ -110,9 +110,9 @@ dependencies = [ [[package]] name = "actix-server" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0da34f8e659ea1b077bb4637948b815cd3768ad5a188fdcd74ff4d84240cd824" +checksum = "3e8613a75dd50cc45f473cee3c34d59ed677c0f7b44480ce3b8247d7dc519327" dependencies = [ "actix-rt", "actix-service", @@ -150,7 +150,7 @@ dependencies = [ "futures-core", "log", "pin-project-lite", - "tokio-rustls", + "tokio-rustls 0.23.4", "tokio-util", "webpki-roots", ] @@ -247,14 +247,13 @@ checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" [[package]] name = "aes" -version = "0.7.5" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e8b47f52ea9bae42228d07ec09eb676433d7c4ed1ebdf0f1d1c29ed446f1ab8" +checksum = "433cfd6710c9986c576a25ca913c39d66a6474107b406f34f91d4a8923395241" dependencies = [ "cfg-if", "cipher", "cpufeatures", - "opaque-debug", ] [[package]] @@ -282,9 +281,9 @@ dependencies = [ [[package]] name = "aho-corasick" -version = "0.7.20" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" +checksum = "67fc08ce920c31afb70f013dcce1bfc3a3195de6a228474e45e1f145b36f8d04" dependencies = [ "memchr", ] @@ -312,49 +311,58 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstream" -version = "0.2.6" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "342258dd14006105c2b75ab1bd7543a03bdf0cfc94383303ac212a04939dff6f" +checksum = "0ca84f3628370c59db74ee214b3263d58f9aadd9b4fe7e711fd87dc452b7f163" dependencies = [ "anstyle", "anstyle-parse", + "anstyle-query", "anstyle-wincon", - "concolor-override", - "concolor-query", + "colorchoice", "is-terminal", "utf8parse", ] [[package]] name = "anstyle" -version = "0.3.5" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23ea9e81bd02e310c216d080f6223c179012256e5151c41db88d12c88a1684d2" +checksum = "41ed9a86bf92ae6580e0a31281f65a1b1d867c0cc68d5346e2ae128dddfa6a7d" [[package]] name = "anstyle-parse" -version = "0.1.1" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7d1bb534e9efed14f3e5f44e7dd1a4f709384023a4165199a4241e18dff0116" +checksum = "e765fd216e48e067936442276d1d57399e37bce53c264d6fefbe298080cb57ee" dependencies = [ "utf8parse", ] [[package]] -name = "anstyle-wincon" -version = "0.2.0" +name = "anstyle-query" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3127af6145b149f3287bb9a0d10ad9c5692dba8c53ad48285e5bec4063834fa" +checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b" +dependencies = [ + "windows-sys 0.48.0", +] + +[[package]] +name = "anstyle-wincon" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "180abfa45703aebe0093f79badacc01b8fd4ea2e35118747e5811127f926e188" dependencies = [ "anstyle", - "windows-sys 0.45.0", + "windows-sys 0.48.0", ] [[package]] name = "anyhow" -version = "1.0.70" +version = "1.0.71" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7de8ce5e0f9f8d88245311066a578d72b7af3e7088f32783804676302df237e4" +checksum = "9c7d0618f0e0b7e8ff11427422b64564d5fb0be1940354bfe2e0529b18a9d9b8" dependencies = [ "backtrace", ] @@ -397,7 +405,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2", "quote", - "syn 2.0.14", + "syn 2.0.18", ] [[package]] @@ -408,7 +416,7 @@ checksum = "b9ccdd8f2a161be9bd5c023df56f1b2a0bd1d83872ae53b71a84a12c9bf6e842" dependencies = [ "proc-macro2", "quote", - "syn 2.0.14", + "syn 2.0.18", ] [[package]] @@ -447,8 +455,8 @@ dependencies = [ "cc", "cfg-if", "libc", - "miniz_oxide", - "object 0.30.2", + "miniz_oxide 0.6.2", + "object", "rustc-demangle", ] @@ -460,15 +468,15 @@ checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" [[package]] name = "base64" -version = "0.21.0" +version = "0.21.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a4ddaa51a5bc52a6948f74c06d20aaaddb71924eab79b8c97a8c556e942d6a" +checksum = "604178f6c5c21f02dc555784810edfb88d34ac2c73b2eae109655649ee73ce3d" [[package]] name = "base64ct" -version = "1.5.3" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b645a089122eccb6111b4f81cbc1a49f5900ac4666bb93ac027feaecf15607bf" +checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" [[package]] name = "benchmarks" @@ -513,12 +521,6 @@ dependencies = [ "serde", ] -[[package]] -name = "bit-vec" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" - [[package]] name = "bitflags" version = "1.3.2" @@ -557,9 +559,9 @@ dependencies = [ [[package]] name = "bstr" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3d4260bcc2e8fc9df1eac4919a720effeb63a3f0952f5bf4944adfa18897f09" +checksum = "a246e68bb43f6cd9db24bea052a53e40405417c5fb372e3d1a8a7f770a564ef5" dependencies = [ "memchr", "once_cell", @@ -569,9 +571,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.11.1" +version = "3.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "572f695136211188308f16ad2ca5c851a712c464060ae6974944458eb83880ba" +checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1" [[package]] name = "byte-unit" @@ -606,7 +608,7 @@ checksum = "fdde5c9cd29ebd706ce1b35600920a33550e402fc998a2e53ad3b42c3c47a192" dependencies = [ "proc-macro2", "quote", - "syn 2.0.14", + "syn 2.0.18", ] [[package]] @@ -623,9 +625,9 @@ checksum = "89b2fd2a0dcf38d7971e2194b6b6eebab45ae01067456a7fd93d5547a61b70be" [[package]] name = "bytestring" -version = "1.2.0" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7f83e57d9154148e355404702e2694463241880b939570d7c97c014da7a69a1" +checksum = "238e4886760d98c4f899360c834fa93e62cf7f721ac3c2da375cbdf4b8679aae" dependencies = [ "bytes", ] @@ -727,9 +729,9 @@ dependencies = [ [[package]] name = "ciborium" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0c137568cc60b904a7724001b35ce2630fd00d5d84805fbb608ab89509d788f" +checksum = "effd91f6c78e5a4ace8a5d3c0b6bfaec9e2baaef55f3efc00e45fb2e477ee926" dependencies = [ "ciborium-io", "ciborium-ll", @@ -738,15 +740,15 @@ dependencies = [ [[package]] name = "ciborium-io" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "346de753af073cc87b52b2083a506b38ac176a44cfb05497b622e27be899b369" +checksum = "cdf919175532b369853f5d5e20b26b43112613fd6fe7aee757e35f7a44642656" [[package]] name = "ciborium-ll" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "213030a2b5a4e0c0892b6652260cf6ccac84827b83a85a534e178e3906c4cf1b" +checksum = "defaa24ecc093c77630e6c15e17c51f5e187bf35ee514f4e2d67baaa96dae22b" dependencies = [ "ciborium-io", "half", @@ -754,18 +756,19 @@ dependencies = [ [[package]] name = "cipher" -version = "0.3.0" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ee52072ec15386f770805afd189a01c8841be8696bed250fa2f13c4c0d6dfb7" +checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" dependencies = [ - "generic-array", + "crypto-common", + "inout", ] [[package]] name = "clap" -version = "3.2.23" +version = "3.2.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71655c45cb9845d3270c9d6df84ebe72b4dad3c2ba3f7023ad47c144e4e473a5" +checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123" dependencies = [ "bitflags", "clap_lex 0.2.4", @@ -775,9 +778,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.2.1" +version = "4.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "046ae530c528f252094e4a77886ee1374437744b2bff1497aa898bbddbbb29b3" +checksum = "93aae7a4192245f70fe75dd9157fc7b4a5bf53e88d30bd4396f7d8f9284d5acc" dependencies = [ "clap_builder", "clap_derive", @@ -786,27 +789,27 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.2.1" +version = "4.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "223163f58c9a40c3b0a43e1c4b50a9ce09f007ea2cb1ec258a687945b4b7929f" +checksum = "4f423e341edefb78c9caba2d9c7f7687d0e72e89df3ce3394554754393ac3990" dependencies = [ "anstream", "anstyle", "bitflags", - "clap_lex 0.4.1", + "clap_lex 0.5.0", "strsim", ] [[package]] name = "clap_derive" -version = "4.2.0" +version = "4.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f9644cd56d6b87dbe899ef8b053e331c0637664e9e21a33dfcdc36093f5c5c4" +checksum = "191d9573962933b4027f932c600cd252ce27a8ad5979418fe78e43c07996f27b" dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.14", + "syn 2.0.18", ] [[package]] @@ -820,9 +823,15 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.4.1" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a2dd5a6fe8c6e3502f568a6353e5273bbb15193ad9a89e457b9970798efbea1" +checksum = "2da6da31387c7e4ef160ffab6d5e7f00c42626fe39aea70a7b0f1773f7dd6c1b" + +[[package]] +name = "colorchoice" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" [[package]] name = "concat-arrays" @@ -835,31 +844,16 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "concolor-override" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a855d4a1978dc52fb0536a04d384c2c0c1aa273597f08b77c8c4d3b2eec6037f" - -[[package]] -name = "concolor-query" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88d11d52c3d7ca2e6d0040212be9e4dbbcd78b6447f535b6b561f449427944cf" -dependencies = [ - "windows-sys 0.45.0", -] - [[package]] name = "console" -version = "0.15.5" +version = "0.15.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3d79fbe8970a77e3e34151cc13d3b3e248aa0faaecb9f6091fa07ebefe5ad60" +checksum = "c926e00cc70edefdc64d3a5ff31cc65bb97a3460097762bd23afb4d8145fccf8" dependencies = [ "encode_unicode", "lazy_static", "libc", - "windows-sys 0.42.0", + "windows-sys 0.45.0", ] [[package]] @@ -914,9 +908,9 @@ checksum = "79bb3adfaf5f75d24b01aee375f7555907840fa2800e5ec8fa3b9e2031830173" [[package]] name = "cpufeatures" -version = "0.2.6" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "280a9f2d8b3a38871a3c8a46fb80db65e5e5ed97da80c4d08bf27fb63e35e181" +checksum = "3e4c1eaa2012c47becbbad2ab175484c2a84d1185b566fb2cc5b8707343dfe58" dependencies = [ "libc", ] @@ -940,7 +934,7 @@ dependencies = [ "atty", "cast", "ciborium", - "clap 3.2.23", + "clap 3.2.25", "criterion-plot", "itertools", "lazy_static", @@ -1197,9 +1191,9 @@ checksum = "8c1bba4f227a4a53d12b653f50ca7bf10c9119ae2aba56aff9e0338b5c98f36a" [[package]] name = "digest" -version = "0.10.6" +version = "0.10.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8168378f4e5023e7218c89c891c0fd8ecdb5e5e4f18cb78f38cf245dd021e76f" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ "block-buffer", "crypto-common", @@ -1249,7 +1243,7 @@ dependencies = [ "tempfile", "thiserror", "time", - "uuid 1.3.1", + "uuid 1.3.3", ] [[package]] @@ -1351,22 +1345,22 @@ dependencies = [ [[package]] name = "enum-iterator" -version = "1.4.0" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "706d9e7cf1c7664859d79cd524e4e53ea2b67ea03c98cc2870c5e539695d597e" +checksum = "7add3873b5dd076766ee79c8e406ad1a472c385476b9e38849f8eec24f1be689" dependencies = [ "enum-iterator-derive", ] [[package]] name = "enum-iterator-derive" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "355f93763ef7b0ae1c43c4d8eccc9d5848d84ad1a1d8ce61c421d1ac85a19d05" +checksum = "eecf8589574ce9b895052fa12d69af7a233f99e6107f5cb8dd1044f2a17bfdcb" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.18", ] [[package]] @@ -1382,17 +1376,6 @@ dependencies = [ "termcolor", ] -[[package]] -name = "errno" -version = "0.2.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1" -dependencies = [ - "errno-dragonfly", - "libc", - "winapi", -] - [[package]] name = "errno" version = "0.3.1" @@ -1453,19 +1436,19 @@ dependencies = [ "faux", "tempfile", "thiserror", - "uuid 1.3.1", + "uuid 1.3.3", ] [[package]] name = "filetime" -version = "0.2.19" +version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e884668cd0c7480504233e951174ddc3b382f7c2666e3b7310b5c4e7b0c37f9" +checksum = "5cbc844cecaee9d4443931972e1289c8ff485cb4cc2767cb03ca139ed6885153" dependencies = [ "cfg-if", "libc", "redox_syscall 0.2.16", - "windows-sys 0.42.0", + "windows-sys 0.48.0", ] [[package]] @@ -1485,13 +1468,12 @@ checksum = "8fcfdc7a0362c9f4444381a9e697c79d435fe65b52a37466fc2c1184cee9edc6" [[package]] name = "flate2" -version = "1.0.25" +version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8a2db397cb1c8772f31494cb8917e48cd1e64f0fa7efac59fbd741a0a8ce841" +checksum = "3b9429470923de8e8cbd4d2dc513535400b4b3fef0319fb5c4e1f520a7bef743" dependencies = [ "crc32fast", - "libz-sys", - "miniz_oxide", + "miniz_oxide 0.7.1", ] [[package]] @@ -1579,7 +1561,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" dependencies = [ "proc-macro2", "quote", - "syn 2.0.14", + "syn 2.0.18", ] [[package]] @@ -1613,47 +1595,16 @@ dependencies = [ ] [[package]] -name = "fuzzcheck" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee76e8096c3fcd82ab23177edddcc9b81b72c123caab54bb1e2dc19fd09d2dec" +name = "fuzzers" +version = "1.2.0" dependencies = [ - "ahash 0.7.6", - "bit-vec", - "cc", - "cfg-if", + "arbitrary", + "clap 4.3.0", "fastrand", - "flate2", - "fuzzcheck_common", - "fuzzcheck_mutators_derive", - "getopts", - "libc", - "md5", - "nu-ansi-term", - "object 0.27.1", - "regex-syntax", + "milli", "serde", "serde_json", -] - -[[package]] -name = "fuzzcheck_common" -version = "0.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dde06f8d25b14a35d43eb2d3dbace3b9193424460b10ad4ccf1b3d542d48f06f" -dependencies = [ - "getopts", -] - -[[package]] -name = "fuzzcheck_mutators_derive" -version = "0.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30ce01e8bbb3e7e0758dcf907fe799f5998a54368963f766ae94b84624ba60c8" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", + "tempfile", ] [[package]] @@ -1681,20 +1632,11 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "36d244a08113319b5ebcabad2b8b7925732d15eec46d7e7ac3c11734f3b7a6ad" -[[package]] -name = "getopts" -version = "0.2.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5" -dependencies = [ - "unicode-width", -] - [[package]] name = "getrandom" -version = "0.2.8" +version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" +checksum = "c85e1d9ab2eadba7e5040d4e09cbd6d072b76a557ad64e797c2cb9d4da21d7e4" dependencies = [ "cfg-if", "libc", @@ -1715,9 +1657,9 @@ dependencies = [ [[package]] name = "gimli" -version = "0.27.0" +version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dec7af912d60cdbd3677c1af9352ebae6fb8394d165568a2234df0fa00f87793" +checksum = "ad0a93d233ebf96623465aad4046a8d3aa4da22d4f4beba5388838c8a434bbb4" [[package]] name = "git2" @@ -1751,9 +1693,9 @@ dependencies = [ [[package]] name = "h2" -version = "0.3.17" +version = "0.3.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66b91535aa35fea1523ad1b86cb6b53c28e0ae566ba4a460f4457e936cad7c6f" +checksum = "d357c7ae988e7d2182f7d7871d0b963962420b0678b0997ce7de72001aeab782" dependencies = [ "bytes", "fnv", @@ -1926,9 +1868,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "hyper" -version = "0.14.25" +version = "0.14.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc5e554ff619822309ffd57d8734d77cd5ce6238bc956f037ea06c58238c9899" +checksum = "ab302d72a6f11a3b910431ff93aae7e773078c769f0a3ef15fb9ec692ed147d4" dependencies = [ "bytes", "futures-channel", @@ -1950,15 +1892,15 @@ dependencies = [ [[package]] name = "hyper-rustls" -version = "0.23.2" +version = "0.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1788965e61b367cd03a62950836d5cd41560c3577d90e40e0819373194d1661c" +checksum = "0646026eb1b3eea4cd9ba47912ea5ce9cc07713d105b1a14698f4e6433d348b7" dependencies = [ "http", "hyper", - "rustls", + "rustls 0.21.1", "tokio", - "tokio-rustls", + "tokio-rustls 0.24.0", ] [[package]] @@ -2004,7 +1946,7 @@ dependencies = [ "tempfile", "thiserror", "time", - "uuid 1.3.1", + "uuid 1.3.3", ] [[package]] @@ -2018,6 +1960,15 @@ dependencies = [ "serde", ] +[[package]] +name = "inout" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0c10553d664a4d0bcff9f4215d0aac67a639cc68ef660840afe309b807bc9f5" +dependencies = [ + "generic-array", +] + [[package]] name = "insta" version = "1.29.0" @@ -2045,9 +1996,9 @@ dependencies = [ [[package]] name = "io-lifetimes" -version = "1.0.10" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c66c74d2ae7e79a5a8f7ac924adbe38ee42a859c6539ad869eb51f0b52dc220" +checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2" dependencies = [ "hermit-abi 0.3.1", "libc", @@ -2079,7 +2030,7 @@ checksum = "adcf93614601c8129ddf72e2d5633df827ba6551541c6d8c59520a371475be1f" dependencies = [ "hermit-abi 0.3.1", "io-lifetimes", - "rustix 0.37.11", + "rustix 0.37.19", "windows-sys 0.48.0", ] @@ -2124,9 +2075,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.60" +version = "0.3.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49409df3e3bf0856b916e2ceaca09ee28e6871cf7d9ce97a692cacfdb2a25a47" +checksum = "2f37a4a5928311ac501dee68b3c7613a1037d0edb30c8e5427bd832d55d1b790" dependencies = [ "wasm-bindgen", ] @@ -2145,7 +2096,7 @@ version = "8.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6971da4d9c3aa03c3d8f3ff0f4155b534aad021292003895a469716b2a230378" dependencies = [ - "base64 0.21.0", + "base64 0.21.2", "pem", "ring", "serde", @@ -2185,9 +2136,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.141" +version = "0.2.144" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3304a64d199bb964be99741b7a14d26972741915b3649639149b2479bb46f4b5" +checksum = "2b00cc1c228a6782d0f076e7b232802e0c5689d41bb5df366f2a6b6621cfdfe1" [[package]] name = "libgit2-sys" @@ -2203,15 +2154,15 @@ dependencies = [ [[package]] name = "libm" -version = "0.2.6" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "348108ab3fba42ec82ff6e9564fc4ca0247bdccdc68dd8af9764bbc79c3c8ffb" +checksum = "f7012b1bbb0719e1097c47611d3898568c546d597c2e74d66f6087edd5233ff4" [[package]] name = "libmimalloc-sys" -version = "0.1.32" +version = "0.1.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43a558e3d911bc3c7bfc8c78bc580b404d6e51c1cefbf656e176a94b49b0df40" +checksum = "f4ac0e912c8ef1b735e92369695618dc5b1819f5a7bf3f167301a3ba1cea515e" dependencies = [ "cc", "libc", @@ -2219,9 +2170,9 @@ dependencies = [ [[package]] name = "libz-sys" -version = "1.1.8" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9702761c3935f8cc2f101793272e202c72b99da8f4224a19ddcf1279a6450bbf" +checksum = "56ee889ecc9568871456d42f603d6a0ce59ff328d291063a45cbdf0036baf6db" dependencies = [ "cc", "libc", @@ -2460,9 +2411,9 @@ checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4" [[package]] name = "linux-raw-sys" -version = "0.3.1" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d59d8c75012853d2e872fb56bc8a2e53718e2cafe1a4c823143141c6d90c322f" +checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519" [[package]] name = "lmdb-rkv-sys" @@ -2504,12 +2455,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.17" +version = "0.4.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" -dependencies = [ - "cfg-if", -] +checksum = "518ef76f2f87365916b142844c16d8fefd85039bc5699050210a7778ee1cd1de" [[package]] name = "logging_timer" @@ -2535,14 +2483,14 @@ dependencies = [ [[package]] name = "manifest-dir-macros" -version = "0.1.16" +version = "0.1.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f08150cf2bab1fc47c2196f4f41173a27fcd0f684165e5458c0046b53a472e2f" +checksum = "450e5ef583bc05177c4975b9ea907047091a9f62e74e81fcafb99dbffac51e7e" dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.18", ] [[package]] @@ -2586,7 +2534,7 @@ dependencies = [ "byte-unit", "bytes", "cargo_toml", - "clap 4.2.1", + "clap 4.3.0", "crossbeam-channel", "deserr", "dump", @@ -2625,7 +2573,7 @@ dependencies = [ "rayon", "regex", "reqwest", - "rustls", + "rustls 0.20.8", "rustls-pemfile", "segment", "serde", @@ -2647,7 +2595,7 @@ dependencies = [ "tokio-stream", "toml", "urlencoding", - "uuid 1.3.1", + "uuid 1.3.3", "vergen", "walkdir", "yaup", @@ -2658,7 +2606,7 @@ dependencies = [ name = "meilisearch-auth" version = "1.2.0" dependencies = [ - "base64 0.21.0", + "base64 0.21.2", "enum-iterator", "hmac", "maplit", @@ -2670,7 +2618,7 @@ dependencies = [ "sha2", "thiserror", "time", - "uuid 1.3.1", + "uuid 1.3.3", ] [[package]] @@ -2700,7 +2648,7 @@ dependencies = [ "thiserror", "time", "tokio", - "uuid 1.3.1", + "uuid 1.3.3", ] [[package]] @@ -2731,7 +2679,6 @@ dependencies = [ name = "milli" version = "1.2.0" dependencies = [ - "arbitrary", "big_s", "bimap", "bincode", @@ -2743,11 +2690,9 @@ dependencies = [ "csv", "deserr", "either", - "fastrand", "filter-parser", "flatten-serde-json", "fst", - "fuzzcheck", "fxhash", "geoutils", "grenad", @@ -2778,14 +2723,14 @@ dependencies = [ "tempfile", "thiserror", "time", - "uuid 1.3.1", + "uuid 1.3.3", ] [[package]] name = "mimalloc" -version = "0.1.36" +version = "0.1.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d88dad3f985ec267a3fcb7a1726f5cb1a7e8cad8b646e70a84f967210df23da" +checksum = "4e2894987a3459f3ffb755608bd82188f8ed00d0ae077f1edea29c068d639d98" dependencies = [ "libmimalloc-sys", ] @@ -2822,15 +2767,24 @@ dependencies = [ ] [[package]] -name = "mio" -version = "0.8.6" +name = "miniz_oxide" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b9d9a46eff5b4ff64b45a9e316a6d1e0bc719ef429cbec4dc630684212bfdf9" +checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" +dependencies = [ + "adler", +] + +[[package]] +name = "mio" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eebffdb73fe72e917997fad08bdbf31ac50b0fa91cec93e69a0662e4264d454c" dependencies = [ "libc", "log", "wasi", - "windows-sys 0.45.0", + "windows-sys 0.48.0", ] [[package]] @@ -2861,23 +2815,13 @@ dependencies = [ [[package]] name = "ntapi" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc51db7b362b205941f71232e56c625156eb9a929f8cf74a428fd5bc094a4afc" +checksum = "e8a3895c6391c39d7fe7ebc444a87eb2991b2a0bc718fdabd071eec617fc68e4" dependencies = [ "winapi", ] -[[package]] -name = "nu-ansi-term" -version = "0.39.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e62e2187cbceeafee9fb7b5e5e182623e0628ebf430a479df4487beb8f92fd7a" -dependencies = [ - "overload", - "winapi", -] - [[package]] name = "num-bigint" version = "0.4.3" @@ -2921,18 +2865,9 @@ dependencies = [ [[package]] name = "object" -version = "0.27.1" +version = "0.30.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67ac1d3f9a1d3616fd9a60c8d74296f22406a238b6a72f5cc1e6f314df4ffbf9" -dependencies = [ - "memchr", -] - -[[package]] -name = "object" -version = "0.30.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b8c786513eb403643f2a88c244c2aaa270ef2153f55094587d0c48a3cf22a83" +checksum = "ea86265d3d3dcb6a27fc51bd29a4bf387fae9d2986b823079d4986af253eb439" dependencies = [ "memchr", ] @@ -2955,17 +2890,11 @@ version = "11.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" -[[package]] -name = "opaque-debug" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5" - [[package]] name = "ordered-float" -version = "3.6.0" +version = "3.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13a384337e997e6860ffbaa83708b2ef329fd8c54cb67a5f64d421e0f943254f" +checksum = "2fc2dbde8f8a79f2102cc474ceb0ad68e3b80b85289ea62389b60e66777e4213" dependencies = [ "num-traits", ] @@ -2976,12 +2905,6 @@ version = "6.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ceedf44fb00f2d1984b0bc98102627ce622e083e49a5bacdb3e514fa4238e267" -[[package]] -name = "overload" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" - [[package]] name = "page_size" version = "0.4.2" @@ -3071,9 +2994,9 @@ dependencies = [ [[package]] name = "pem" -version = "1.1.0" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03c64931a1a212348ec4f3b4362585eca7159d0d09cbdf4a7f74f02173596fd4" +checksum = "a8835c273a76a90455d7344889b0964598e3316e2a79ede8e36f16bdcf2228b8" dependencies = [ "base64 0.13.1", ] @@ -3094,9 +3017,9 @@ dependencies = [ [[package]] name = "pest" -version = "2.5.7" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b1403e8401ad5dedea73c626b99758535b342502f8d1e361f4a2dd952749122" +checksum = "e68e84bfb01f0507134eac1e9b410a12ba379d064eab48c50ba4ce329a527b70" dependencies = [ "thiserror", "ucd-trie", @@ -3104,9 +3027,9 @@ dependencies = [ [[package]] name = "pest_derive" -version = "2.5.7" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be99c4c1d2fc2769b1d00239431d711d08f6efedcecb8b6e30707160aee99c15" +checksum = "6b79d4c71c865a25a4322296122e3924d30bc8ee0834c8bfc8b95f7f054afbfb" dependencies = [ "pest", "pest_generator", @@ -3114,22 +3037,22 @@ dependencies = [ [[package]] name = "pest_generator" -version = "2.5.7" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e56094789873daa36164de2e822b3888c6ae4b4f9da555a1103587658c805b1e" +checksum = "6c435bf1076437b851ebc8edc3a18442796b30f1728ffea6262d59bbe28b077e" dependencies = [ "pest", "pest_meta", "proc-macro2", "quote", - "syn 2.0.14", + "syn 2.0.18", ] [[package]] name = "pest_meta" -version = "2.5.7" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6733073c7cff3d8459fda0e42f13a047870242aed8b509fe98000928975f359e" +checksum = "745a452f8eb71e39ffd8ee32b3c5f51d03845f99786fa9b68db6ff509c505411" dependencies = [ "once_cell", "pest", @@ -3194,9 +3117,9 @@ checksum = "3bd12336e3afa34152e002f57df37a7056778daa59ea542b3473b87f5fb260c4" [[package]] name = "pkg-config" -version = "0.3.26" +version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160" +checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" [[package]] name = "platform-dirs" @@ -3267,9 +3190,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.56" +version = "1.0.59" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b63bdb0cd06f1f4dedf69b254734f9b45af66e4a031e42a7480257d9898b435" +checksum = "6aeca18b86b413c660b781aa319e4e2648a3e6f9eadc9b47e9038e6fe9f3451b" dependencies = [ "unicode-ident", ] @@ -3284,7 +3207,7 @@ dependencies = [ "byteorder", "hex", "lazy_static", - "rustix 0.36.11", + "rustix 0.36.14", ] [[package]] @@ -3312,9 +3235,9 @@ checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94" [[package]] name = "quote" -version = "1.0.26" +version = "1.0.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc" +checksum = "1b9ab9c7eadfd8df19006f1cf1a4aed13540ed5cbc047010ece5826e10825488" dependencies = [ "proc-macro2", ] @@ -3402,9 +3325,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.7.3" +version = "1.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b1f693b24f6ac912f4893ef08244d70b6067480d2f1a46e950c9691e6749d1d" +checksum = "81ca098a9821bd52d6b24fd8b10bd081f47d39c22778cafaa75a2857a62c6390" dependencies = [ "aho-corasick", "memchr", @@ -3419,17 +3342,17 @@ checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" [[package]] name = "regex-syntax" -version = "0.6.29" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" +checksum = "436b050e76ed2903236f032a59761c1eb99e1b0aead2c257922771dab1fc8c78" [[package]] name = "reqwest" -version = "0.11.16" +version = "0.11.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27b71749df584b7f4cac2c426c127a7c785a5106cc98f7a8feb044115f0fa254" +checksum = "cde824a14b7c14f85caff81225f411faacc04a2013f41670f41443742b1c1c55" dependencies = [ - "base64 0.21.0", + "base64 0.21.2", "bytes", "encoding_rs", "futures-core", @@ -3446,13 +3369,13 @@ dependencies = [ "once_cell", "percent-encoding", "pin-project-lite", - "rustls", + "rustls 0.21.1", "rustls-pemfile", "serde", "serde_json", "serde_urlencoded", "tokio", - "tokio-rustls", + "tokio-rustls 0.24.0", "tower-service", "url", "wasm-bindgen", @@ -3509,9 +3432,9 @@ dependencies = [ [[package]] name = "rustc-demangle" -version = "0.1.21" +version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ef03e0a2b150c7a90d01faf6254c9c48a41e95fb2a8c2ac1c6f0d2b9aefc342" +checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" [[package]] name = "rustc_version" @@ -3524,12 +3447,12 @@ dependencies = [ [[package]] name = "rustix" -version = "0.36.11" +version = "0.36.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db4165c9963ab29e422d6c26fbc1d37f15bace6b2810221f9d925023480fcf0e" +checksum = "14e4d67015953998ad0eb82887a0eb0129e18a7e2f3b7b0f6c422fddcd503d62" dependencies = [ "bitflags", - "errno 0.2.8", + "errno", "io-lifetimes", "libc", "linux-raw-sys 0.1.4", @@ -3538,15 +3461,15 @@ dependencies = [ [[package]] name = "rustix" -version = "0.37.11" +version = "0.37.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85597d61f83914ddeba6a47b3b8ffe7365107221c2e557ed94426489fefb5f77" +checksum = "acf8729d8542766f1b2cf77eb034d52f40d375bb8b615d0b147089946e16613d" dependencies = [ "bitflags", - "errno 0.3.1", + "errno", "io-lifetimes", "libc", - "linux-raw-sys 0.3.1", + "linux-raw-sys 0.3.8", "windows-sys 0.48.0", ] @@ -3562,13 +3485,35 @@ dependencies = [ "webpki", ] +[[package]] +name = "rustls" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c911ba11bc8433e811ce56fde130ccf32f5127cab0e0194e9c68c5a5b671791e" +dependencies = [ + "log", + "ring", + "rustls-webpki", + "sct", +] + [[package]] name = "rustls-pemfile" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d194b56d58803a43635bdc398cd17e383d6f71f9182b9a192c127ca42494a59b" dependencies = [ - "base64 0.21.0", + "base64 0.21.2", +] + +[[package]] +name = "rustls-webpki" +version = "0.100.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6207cd5ed3d8dca7816f8f3725513a34609c0c765bf652b8c3cb4cfd87db46b" +dependencies = [ + "ring", + "untrusted", ] [[package]] @@ -3630,9 +3575,9 @@ checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed" [[package]] name = "serde" -version = "1.0.160" +version = "1.0.163" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb2f3770c8bce3bcda7e149193a069a0f4365bda1fa5cd88e03bca26afc1216c" +checksum = "2113ab51b87a539ae008b5c6c02dc020ffa39afd2d83cffcb3f4eb2722cebec2" dependencies = [ "serde_derive", ] @@ -3648,20 +3593,20 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.160" +version = "1.0.163" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "291a097c63d8497e00160b166a967a4a79c64f3facdd01cbd7502231688d77df" +checksum = "8c805777e3930c8883389c602315a24224bcc738b63905ef87cd1420353ea93e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.14", + "syn 2.0.18", ] [[package]] name = "serde_json" -version = "1.0.95" +version = "1.0.96" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d721eca97ac802aa7777b701877c8004d950fc142651367300d21c1cc0194744" +checksum = "057d394a50403bcac12672b2b18fb387ab6d289d957dab67dd201875391e52f1" dependencies = [ "indexmap", "itoa", @@ -3671,9 +3616,9 @@ dependencies = [ [[package]] name = "serde_spanned" -version = "0.6.1" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0efd8caf556a6cebd3b285caf480045fcc1ac04f6bd786b09a6f11af30c4fcf4" +checksum = "93107647184f6027e3b7dcb2e11034cf95ffa1e3a682c67951963ac69c1c007d" dependencies = [ "serde", ] @@ -3767,9 +3712,9 @@ dependencies = [ [[package]] name = "slice-group-by" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03b634d87b960ab1a38c4fe143b508576f075e7c978bfad18217645ebfdfa2ec" +checksum = "826167069c09b99d56f31e9ae5c99049e932a98c9dc2dac47645b08dbbf76ba7" [[package]] name = "smallstr" @@ -3854,9 +3799,9 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" [[package]] name = "subtle" -version = "2.4.1" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601" +checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" [[package]] name = "syn" @@ -3871,9 +3816,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.14" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcf316d5356ed6847742d036f8a39c3b8435cac10bd528a4bd461928a6ab34d5" +checksum = "32d41677bcbe24c20c52e7c70b0d8db04134c5d1066bf98662e2871ad200ea3e" dependencies = [ "proc-macro2", "quote", @@ -3929,9 +3874,9 @@ dependencies = [ [[package]] name = "temp-env" -version = "0.3.3" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ee95b343d943e5a0d2221fb73029e8040f3c91d6d06afec86c664682a361681" +checksum = "9547444bfe52cbd79515c6c8087d8ae6ca8d64d2d31a27746320f5cb81d1a15c" dependencies = [ "parking_lot", ] @@ -3945,7 +3890,7 @@ dependencies = [ "cfg-if", "fastrand", "redox_syscall 0.3.5", - "rustix 0.37.11", + "rustix 0.37.19", "windows-sys 0.45.0", ] @@ -3981,14 +3926,14 @@ checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.14", + "syn 2.0.18", ] [[package]] name = "time" -version = "0.3.20" +version = "0.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd0cbfecb4d19b5ea75bb31ad904eb5b9fa13f21079c3b92017ebdf4999a5890" +checksum = "8f3403384eaacbca9923fa06940178ac13e4edb725486d70e8e15881d0c836cc" dependencies = [ "itoa", "serde", @@ -3998,15 +3943,15 @@ dependencies = [ [[package]] name = "time-core" -version = "0.1.0" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e153e1f1acaef8acc537e68b44906d2db6436e2b35ac2c6b42640fff91f00fd" +checksum = "7300fbefb4dadc1af235a9cef3737cea692a9d97e1b9cbcd4ebdae6f8868e6fb" [[package]] name = "time-macros" -version = "0.2.8" +version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd80a657e71da814b8e5d60d3374fc6d35045062245d80224748ae522dd76f36" +checksum = "372950940a5f07bf38dbe211d7283c9e6d7327df53794992d293e534c733d09b" dependencies = [ "time-core", ] @@ -4038,9 +3983,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.27.0" +version = "1.28.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0de47a4eecbe11f498978a9b29d792f0d2692d1dd003650c24c76510e3bc001" +checksum = "94d7b1cfd2aa4011f2de74c2c4c63665e27a71006b0a192dcd2710272e73dfa2" dependencies = [ "autocfg", "bytes", @@ -4052,18 +3997,18 @@ dependencies = [ "signal-hook-registry", "socket2", "tokio-macros", - "windows-sys 0.45.0", + "windows-sys 0.48.0", ] [[package]] name = "tokio-macros" -version = "2.0.0" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61a573bdc87985e9d6ddeed1b3d864e8a302c847e40d647746df2f1de209d1ce" +checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.14", + "syn 2.0.18", ] [[package]] @@ -4072,16 +4017,26 @@ version = "0.23.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c43ee83903113e03984cb9e5cebe6c04a5116269e900e3ddba8f068a62adda59" dependencies = [ - "rustls", + "rustls 0.20.8", "tokio", "webpki", ] [[package]] -name = "tokio-stream" -version = "0.1.12" +name = "tokio-rustls" +version = "0.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fb52b74f05dbf495a8fba459fdc331812b96aa086d9eb78101fa0d4569c3313" +checksum = "e0d409377ff5b1e3ca6437aa86c1eb7d40c134bfec254e44c830defa92669db5" +dependencies = [ + "rustls 0.21.1", + "tokio", +] + +[[package]] +name = "tokio-stream" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "397c988d37662c7dda6d2208364a706264bf3d6138b11d436cbac0ad38832842" dependencies = [ "futures-core", "pin-project-lite", @@ -4090,9 +4045,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.7" +version = "0.7.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5427d89453009325de0d8f342c9490009f76e999cb7672d77e46267448f7e6b2" +checksum = "806fe8c2c87eccc8b3267cbae29ed3ab2d0bd37fca70ab622e46aaa9375ddb7d" dependencies = [ "bytes", "futures-core", @@ -4104,9 +4059,9 @@ dependencies = [ [[package]] name = "toml" -version = "0.7.3" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b403acf6f2bb0859c93c7f0d967cb4a75a7ac552100f9322faf64dc047669b21" +checksum = "d6135d499e69981f9ff0ef2167955a5333c35e36f6937d382974566b3d5b94ec" dependencies = [ "serde", "serde_spanned", @@ -4116,18 +4071,18 @@ dependencies = [ [[package]] name = "toml_datetime" -version = "0.6.1" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ab8ed2edee10b50132aed5f331333428b011c99402b5a534154ed15746f9622" +checksum = "5a76a9312f5ba4c2dec6b9161fdf25d87ad8a09256ccea5a556fef03c706a10f" dependencies = [ "serde", ] [[package]] name = "toml_edit" -version = "0.19.8" +version = "0.19.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "239410c8609e8125456927e6707163a3b1fdb40561e4b803bc041f466ccfdc13" +checksum = "2380d56e8670370eee6566b0bfd4265f65b3f432e8c6d85623f728d4fa31f739" dependencies = [ "indexmap", "serde", @@ -4156,9 +4111,9 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.30" +version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24eb03ba0eab1fd845050058ce5e616558e8f8d8fca633e6b163fe25c797213a" +checksum = "0955b8137a1df6f1a2e9a37d8a6656291ff0297c1a97c24e0d8425fe2312f79a" dependencies = [ "once_cell", ] @@ -4198,15 +4153,15 @@ checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" [[package]] name = "unicode-blocks" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9de2be6bad6f56ce8373d377e611cbb2265de3a656138065609ce82e217aad70" +checksum = "943e3f1f50cc455d072e0801ccb71ff893b0c88060b1169f92e35fb5bb881cc6" [[package]] name = "unicode-ident" -version = "1.0.8" +version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4" +checksum = "b15811caf2415fb889178633e7724bad2509101cde276048e013b9def5e51fa0" [[package]] name = "unicode-normalization" @@ -4223,12 +4178,6 @@ version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" -[[package]] -name = "unicode-width" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" - [[package]] name = "unicode-xid" version = "0.2.4" @@ -4281,9 +4230,9 @@ dependencies = [ [[package]] name = "uuid" -version = "1.3.1" +version = "1.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b55a3fef2a1e3b3a00ce878640918820d3c51081576ac657d23af9fc7928fdb" +checksum = "345444e32442451b267fc254ae85a209c64be56d2890e601a0c37ff0c3c5ecd2" dependencies = [ "getrandom", "serde", @@ -4356,9 +4305,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.83" +version = "0.2.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eaf9f5aceeec8be17c128b2e93e031fb8a4d469bb9c4ae2d7dc1888b26887268" +checksum = "5bba0e8cb82ba49ff4e229459ff22a191bbe9a1cb3a341610c9c33efc27ddf73" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -4366,24 +4315,24 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.83" +version = "0.2.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c8ffb332579b0557b52d268b91feab8df3615f265d5270fec2a8c95b17c1142" +checksum = "19b04bc93f9d6bdee709f6bd2118f57dd6679cf1176a1af464fca3ab0d66d8fb" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.18", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.33" +version = "0.4.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23639446165ca5a5de86ae1d8896b737ae80319560fbaa4c2887b7da6e7ebd7d" +checksum = "2d1985d03709c53167ce907ff394f5316aa22cb4e12761295c5dc57dacb6297e" dependencies = [ "cfg-if", "js-sys", @@ -4393,9 +4342,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.83" +version = "0.2.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "052be0f94026e6cbc75cdefc9bae13fd6052cdcaf532fa6c45e7ae33a1e6c810" +checksum = "14d6b024f1a526bb0234f52840389927257beb670610081360e5a03c5df9c258" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -4403,28 +4352,28 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.83" +version = "0.2.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07bc0c051dc5f23e307b13285f9d75df86bfdf816c5721e573dec1f9b8aa193c" +checksum = "e128beba882dd1eb6200e1dc92ae6c5dbaa4311aa7bb211ca035779e5efc39f8" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.18", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.83" +version = "0.2.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c38c045535d93ec4f0b4defec448e4291638ee608530863b1e2ba115d4fff7f" +checksum = "ed9d5b4305409d1fc9482fee2d7f9bcbf24b3972bf59817ef757e23982242a93" [[package]] name = "web-sys" -version = "0.3.60" +version = "0.3.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcda906d8be16e728fd5adc5b729afad4e444e106ab28cd1c7256e54fa61510f" +checksum = "3bdd9ef4e984da1187bf8110c5cf5b845fbc87a23602cdf912386a76fcd3a7c2" dependencies = [ "js-sys", "wasm-bindgen", @@ -4490,21 +4439,6 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" -[[package]] -name = "windows-sys" -version = "0.42.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" -dependencies = [ - "windows_aarch64_gnullvm 0.42.2", - "windows_aarch64_msvc 0.42.2", - "windows_i686_gnu 0.42.2", - "windows_i686_msvc 0.42.2", - "windows_x86_64_gnu 0.42.2", - "windows_x86_64_gnullvm 0.42.2", - "windows_x86_64_msvc 0.42.2", -] - [[package]] name = "windows-sys" version = "0.45.0" @@ -4639,9 +4573,9 @@ checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" [[package]] name = "winnow" -version = "0.4.1" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae8970b36c66498d8ff1d66685dc86b91b29db0c7739899012f63a63814b4b28" +checksum = "61de7bac303dc551fe038e2b3cef0f571087a47571ea6e79a87692ac99b99699" dependencies = [ "memchr", ] @@ -4712,9 +4646,9 @@ dependencies = [ [[package]] name = "zip" -version = "0.6.4" +version = "0.6.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0445d0fbc924bb93539b4316c11afb121ea39296f99a3c4c9edad09e3658cdef" +checksum = "760394e246e4c28189f19d488c058bf16f564016aefac5d32bb1f3b51d5e9261" dependencies = [ "aes", "byteorder", @@ -4745,7 +4679,7 @@ version = "0.12.3+zstd.1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76eea132fb024e0e13fd9c2f5d5d595d8a967aa72382ac2f9d39fcc95afd0806" dependencies = [ - "zstd-safe 6.0.4+zstd.1.5.4", + "zstd-safe 6.0.5+zstd.1.5.4", ] [[package]] @@ -4760,9 +4694,9 @@ dependencies = [ [[package]] name = "zstd-safe" -version = "6.0.4+zstd.1.5.4" +version = "6.0.5+zstd.1.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7afb4b54b8910cf5447638cb54bf4e8a65cbedd783af98b98c62ffe91f185543" +checksum = "d56d9e60b4b1758206c238a10165fbcae3ca37b01744e394c463463f6529d23b" dependencies = [ "libc", "zstd-sys", @@ -4770,9 +4704,9 @@ dependencies = [ [[package]] name = "zstd-sys" -version = "2.0.7+zstd.1.5.4" +version = "2.0.8+zstd.1.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94509c3ba2fe55294d752b79842c530ccfab760192521df74a081a78d2b3c7f5" +checksum = "5556e6ee25d32df2586c098bbfa278803692a20d0ab9565e049480d52707ec8c" dependencies = [ "cc", "libc", diff --git a/Cargo.toml b/Cargo.toml index b69831b9c..f7e5758d5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,7 +13,8 @@ members = [ "filter-parser", "flatten-serde-json", "json-depth-checker", - "benchmarks" + "benchmarks", + "fuzzers", ] [workspace.package] diff --git a/fuzzers/Cargo.toml b/fuzzers/Cargo.toml new file mode 100644 index 000000000..cbc27a55c --- /dev/null +++ b/fuzzers/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "fuzzers" +publish = false + +version.workspace = true +authors.workspace = true +description.workspace = true +homepage.workspace = true +readme.workspace = true +edition.workspace = true +license.workspace = true + +[dependencies] +arbitrary = { version = "1.3.0", features = ["derive"] } +clap = { version = "4.3.0", features = ["derive"] } +fastrand = "1.9.0" +milli = { path = "../milli" } +serde = { version = "1.0.160", features = ["derive"] } +serde_json = { version = "1.0.95", features = ["preserve_order"] } +tempfile = "3.5.0" diff --git a/fuzzers/README.md b/fuzzers/README.md new file mode 100644 index 000000000..d9d02de0e --- /dev/null +++ b/fuzzers/README.md @@ -0,0 +1,3 @@ +# Fuzzers + +The purpose of this crate is to contains all the handmade "fuzzer" we may need. diff --git a/fuzzers/src/bin/fuzz.rs b/fuzzers/src/bin/fuzz.rs new file mode 100644 index 000000000..880c7c452 --- /dev/null +++ b/fuzzers/src/bin/fuzz.rs @@ -0,0 +1,136 @@ +use std::num::NonZeroUsize; +use std::path::PathBuf; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::time::Duration; + +use arbitrary::{Arbitrary, Unstructured}; +use clap::Parser; +use fuzzers::Operation; +use milli::heed::EnvOpenOptions; +use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig}; +use milli::Index; +use tempfile::TempDir; + +#[derive(Debug, Arbitrary)] +struct Batch([Operation; 5]); + +#[derive(Debug, Clone, Parser)] +struct Opt { + /// The number of fuzzer to run in parallel. + #[clap(long)] + par: Option, + // We need to put a lot of newlines in the following documentation or else everything gets collapsed on one line + /// The path in which the databases will be created. + /// Using a ramdisk is recommended. + /// + /// Linux: + /// + /// sudo mount -t tmpfs -o size=2g tmpfs ramdisk # to create it + /// + /// sudo umount ramdisk # to remove it + /// + /// MacOS: + /// + /// diskutil erasevolume HFS+ 'RAM Disk' `hdiutil attach -nobrowse -nomount ram://4194304 # create it + /// + /// hdiutil detach /dev/:the_disk + /// + #[clap(long)] + path: Option, +} + +fn main() { + let opt = Opt::parse(); + let progression: &'static AtomicUsize = Box::leak(Box::new(AtomicUsize::new(0))); + + let par = opt.par.unwrap_or_else(|| std::thread::available_parallelism().unwrap()).get(); + let mut handles = Vec::with_capacity(par); + + for _ in 0..par { + let opt = opt.clone(); + + let handle = std::thread::spawn(move || { + let mut options = EnvOpenOptions::new(); + options.map_size(1024 * 1024 * 1024 * 1024); + let tempdir = match opt.path { + Some(path) => TempDir::new_in(path).unwrap(), + None => TempDir::new().unwrap(), + }; + let index = Index::new(options, tempdir.path()).unwrap(); + let indexer_config = IndexerConfig::default(); + let index_documents_config = IndexDocumentsConfig::default(); + + loop { + let v: Vec = std::iter::repeat_with(|| fastrand::u8(..)).take(1000).collect(); + + let mut data = Unstructured::new(&v); + let batches = <[Batch; 5]>::arbitrary(&mut data).unwrap(); + // will be used to display the error once a thread crashes + let dbg_input = format!("{:#?}", batches); + + let mut wtxn = index.write_txn().unwrap(); + + for batch in batches { + let mut builder = IndexDocuments::new( + &mut wtxn, + &index, + &indexer_config, + index_documents_config.clone(), + |_| (), + || false, + ) + .unwrap(); + + for op in batch.0 { + match op { + Operation::AddDoc(doc) => { + let documents = + milli::documents::objects_from_json_value(doc.to_d()); + let documents = + milli::documents::documents_batch_reader_from_objects( + documents, + ); + let (b, _added) = + builder.add_documents(documents).expect(&dbg_input); + builder = b; + } + Operation::DeleteDoc(id) => { + let (b, _removed) = + builder.remove_documents(vec![id.to_s()]).unwrap(); + builder = b; + } + } + } + builder.execute().expect(&dbg_input); + + // after executing a batch we check if the database is corrupted + let res = index.search(&wtxn).execute().expect(&dbg_input); + index.documents(&wtxn, res.documents_ids).expect(&dbg_input); + progression.fetch_add(1, Ordering::Relaxed); + } + wtxn.abort().unwrap(); + } + }); + handles.push(handle); + } + + std::thread::spawn(|| { + let mut last_value = 0; + let start = std::time::Instant::now(); + loop { + let total = progression.load(Ordering::Relaxed); + println!( + "Has been running for {:?}. Tested {} new values for a total of {}.", + start.elapsed(), + total - last_value, + total + ); + last_value = total; + std::thread::sleep(Duration::from_secs(1)); + } + }); + + for handle in handles { + handle.join().unwrap(); + } +} diff --git a/fuzzers/src/lib.rs b/fuzzers/src/lib.rs new file mode 100644 index 000000000..c0eef38e8 --- /dev/null +++ b/fuzzers/src/lib.rs @@ -0,0 +1,46 @@ +use arbitrary::Arbitrary; +use serde_json::{json, Value}; + +#[derive(Debug, Arbitrary)] +pub enum Document { + One, + Two, + Three, + Four, + Five, + Six, +} + +impl Document { + pub fn to_d(&self) -> Value { + match self { + Document::One => json!({ "id": 0, "doggo": "bernese" }), + Document::Two => json!({ "id": 0, "doggo": "golden" }), + Document::Three => json!({ "id": 0, "catto": "jorts" }), + Document::Four => json!({ "id": 1, "doggo": "bernese" }), + Document::Five => json!({ "id": 1, "doggo": "golden" }), + Document::Six => json!({ "id": 1, "catto": "jorts" }), + } + } +} + +#[derive(Debug, Arbitrary)] +pub enum DocId { + Zero, + One, +} + +impl DocId { + pub fn to_s(&self) -> String { + match self { + DocId::Zero => "0".to_string(), + DocId::One => "1".to_string(), + } + } +} + +#[derive(Debug, Arbitrary)] +pub enum Operation { + AddDoc(Document), + DeleteDoc(DocId), +} diff --git a/milli/Cargo.toml b/milli/Cargo.toml index f708edc73..acd1d35c4 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -65,13 +65,6 @@ maplit = "1.0.2" md5 = "0.7.0" rand = {version = "0.8.5", features = ["small_rng"] } -# fuzzing -arbitrary = { version = "1.3.0", features = ["derive"] } -fastrand = "1.9.0" - -[target.'cfg(fuzzing)'.dev-dependencies] -fuzzcheck = "0.12.1" - [features] all-tokenizations = ["charabia/default"] diff --git a/milli/examples/fuzz.rs b/milli/tests/fuzz.rs similarity index 99% rename from milli/examples/fuzz.rs rename to milli/tests/fuzz.rs index dc79e37b7..3ecc151ad 100644 --- a/milli/examples/fuzz.rs +++ b/milli/tests/fuzz.rs @@ -52,7 +52,9 @@ enum Operation { #[derive(Debug, Arbitrary)] struct Batch([Operation; 5]); -fn main() { +#[test] +#[ignore] +fn fuzz() { let mut options = EnvOpenOptions::new(); options.map_size(1024 * 1024 * 1024 * 1024); let _tempdir = TempDir::new().unwrap(); From 8d40d300a5dc4aac21f0f847ff379cd2ad9a595b Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 29 May 2023 12:37:24 +0200 Subject: [PATCH 48/73] rename the fuzzer to indexing --- fuzzers/src/bin/{fuzz.rs => indexing.rs} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename fuzzers/src/bin/{fuzz.rs => indexing.rs} (100%) diff --git a/fuzzers/src/bin/fuzz.rs b/fuzzers/src/bin/indexing.rs similarity index 100% rename from fuzzers/src/bin/fuzz.rs rename to fuzzers/src/bin/indexing.rs From 99e9057684853302f85707a5838fd7c85f22cf86 Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 29 May 2023 13:07:06 +0200 Subject: [PATCH 49/73] rename the indexing fuzzer to fuzz-indexing so it doesn't collide with other binary name when being called from the root of the workspace --- fuzzers/src/bin/{indexing.rs => fuzz-indexing.rs} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename fuzzers/src/bin/{indexing.rs => fuzz-indexing.rs} (100%) diff --git a/fuzzers/src/bin/indexing.rs b/fuzzers/src/bin/fuzz-indexing.rs similarity index 100% rename from fuzzers/src/bin/indexing.rs rename to fuzzers/src/bin/fuzz-indexing.rs From 67a583bedfb312454bad6684d6ae4fc5dc69965d Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 29 May 2023 13:39:26 +0200 Subject: [PATCH 50/73] handle the panic happening in milli --- fuzzers/src/bin/fuzz-indexing.rs | 92 +++++++++++++++++--------------- 1 file changed, 48 insertions(+), 44 deletions(-) diff --git a/fuzzers/src/bin/fuzz-indexing.rs b/fuzzers/src/bin/fuzz-indexing.rs index 880c7c452..6d8ad61ed 100644 --- a/fuzzers/src/bin/fuzz-indexing.rs +++ b/fuzzers/src/bin/fuzz-indexing.rs @@ -34,7 +34,6 @@ struct Opt { /// diskutil erasevolume HFS+ 'RAM Disk' `hdiutil attach -nobrowse -nomount ram://4194304 # create it /// /// hdiutil detach /dev/:the_disk - /// #[clap(long)] path: Option, } @@ -60,56 +59,61 @@ fn main() { let indexer_config = IndexerConfig::default(); let index_documents_config = IndexDocumentsConfig::default(); - loop { - let v: Vec = std::iter::repeat_with(|| fastrand::u8(..)).take(1000).collect(); + std::thread::scope(|s| { + loop { + let v: Vec = + std::iter::repeat_with(|| fastrand::u8(..)).take(1000).collect(); - let mut data = Unstructured::new(&v); - let batches = <[Batch; 5]>::arbitrary(&mut data).unwrap(); - // will be used to display the error once a thread crashes - let dbg_input = format!("{:#?}", batches); + let mut data = Unstructured::new(&v); + let batches = <[Batch; 5]>::arbitrary(&mut data).unwrap(); + // will be used to display the error once a thread crashes + let dbg_input = format!("{:#?}", batches); - let mut wtxn = index.write_txn().unwrap(); + let handle = s.spawn(|| { + let mut wtxn = index.write_txn().unwrap(); - for batch in batches { - let mut builder = IndexDocuments::new( - &mut wtxn, - &index, - &indexer_config, - index_documents_config.clone(), - |_| (), - || false, - ) - .unwrap(); + for batch in batches { + let mut builder = IndexDocuments::new( + &mut wtxn, + &index, + &indexer_config, + index_documents_config.clone(), + |_| (), + || false, + ) + .unwrap(); - for op in batch.0 { - match op { - Operation::AddDoc(doc) => { - let documents = - milli::documents::objects_from_json_value(doc.to_d()); - let documents = - milli::documents::documents_batch_reader_from_objects( - documents, - ); - let (b, _added) = - builder.add_documents(documents).expect(&dbg_input); - builder = b; - } - Operation::DeleteDoc(id) => { - let (b, _removed) = - builder.remove_documents(vec![id.to_s()]).unwrap(); - builder = b; + for op in batch.0 { + match op { + Operation::AddDoc(doc) => { + let documents = + milli::documents::objects_from_json_value(doc.to_d()); + let documents = + milli::documents::documents_batch_reader_from_objects( + documents, + ); + let (b, _added) = builder.add_documents(documents).unwrap(); + builder = b; + } + Operation::DeleteDoc(id) => { + let (b, _removed) = + builder.remove_documents(vec![id.to_s()]).unwrap(); + builder = b; + } + } } + builder.execute().unwrap(); + + // after executing a batch we check if the database is corrupted + let res = index.search(&wtxn).execute().unwrap(); + index.documents(&wtxn, res.documents_ids).unwrap(); + progression.fetch_add(1, Ordering::Relaxed); } - } - builder.execute().expect(&dbg_input); - - // after executing a batch we check if the database is corrupted - let res = index.search(&wtxn).execute().expect(&dbg_input); - index.documents(&wtxn, res.documents_ids).expect(&dbg_input); - progression.fetch_add(1, Ordering::Relaxed); + wtxn.abort().unwrap(); + }); + handle.join().expect(&dbg_input); } - wtxn.abort().unwrap(); - } + }); }); handles.push(handle); } From 46fa99f486fc328357cc5317723fb5787711f2ee Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 29 May 2023 13:44:32 +0200 Subject: [PATCH 51/73] make the fuzzer stops if an error occurs --- fuzzers/src/bin/fuzz-indexing.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fuzzers/src/bin/fuzz-indexing.rs b/fuzzers/src/bin/fuzz-indexing.rs index 6d8ad61ed..4c6a55575 100644 --- a/fuzzers/src/bin/fuzz-indexing.rs +++ b/fuzzers/src/bin/fuzz-indexing.rs @@ -1,6 +1,6 @@ use std::num::NonZeroUsize; use std::path::PathBuf; -use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; use std::time::Duration; use arbitrary::{Arbitrary, Unstructured}; @@ -41,6 +41,7 @@ struct Opt { fn main() { let opt = Opt::parse(); let progression: &'static AtomicUsize = Box::leak(Box::new(AtomicUsize::new(0))); + let stop: &'static AtomicBool = Box::leak(Box::new(AtomicBool::new(false))); let par = opt.par.unwrap_or_else(|| std::thread::available_parallelism().unwrap()).get(); let mut handles = Vec::with_capacity(par); @@ -61,6 +62,9 @@ fn main() { std::thread::scope(|s| { loop { + if stop.load(Ordering::Relaxed) { + return; + } let v: Vec = std::iter::repeat_with(|| fastrand::u8(..)).take(1000).collect(); @@ -111,7 +115,10 @@ fn main() { } wtxn.abort().unwrap(); }); - handle.join().expect(&dbg_input); + if let err @ Err(_) = handle.join() { + stop.store(true, Ordering::Relaxed); + err.expect(&dbg_input); + } } }); }); From 23a5b45ebf9e0902fe6f6598598ab0915abfd21b Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 29 May 2023 14:02:37 +0200 Subject: [PATCH 52/73] drop the old fuzz file --- milli/tests/fuzz.rs | 111 -------------------------------------------- 1 file changed, 111 deletions(-) delete mode 100644 milli/tests/fuzz.rs diff --git a/milli/tests/fuzz.rs b/milli/tests/fuzz.rs deleted file mode 100644 index 3ecc151ad..000000000 --- a/milli/tests/fuzz.rs +++ /dev/null @@ -1,111 +0,0 @@ -use arbitrary::{Arbitrary, Unstructured}; -use milli::heed::EnvOpenOptions; -use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig}; -use milli::Index; -use serde_json::{json, Value}; -use tempfile::TempDir; - -#[derive(Debug, Arbitrary)] -enum Document { - One, - Two, - Three, - Four, - Five, - Six, -} - -impl Document { - pub fn to_d(&self) -> Value { - match self { - Document::One => json!({ "id": 0, "doggo": "bernese" }), - Document::Two => json!({ "id": 0, "doggo": "golden" }), - Document::Three => json!({ "id": 0, "catto": "jorts" }), - Document::Four => json!({ "id": 1, "doggo": "bernese" }), - Document::Five => json!({ "id": 1, "doggo": "golden" }), - Document::Six => json!({ "id": 1, "catto": "jorts" }), - } - } -} - -#[derive(Debug, Arbitrary)] -enum DocId { - Zero, - One, -} - -impl DocId { - pub fn to_s(&self) -> String { - match self { - DocId::Zero => "0".to_string(), - DocId::One => "1".to_string(), - } - } -} - -#[derive(Debug, Arbitrary)] -enum Operation { - AddDoc(Document), - DeleteDoc(DocId), -} - -#[derive(Debug, Arbitrary)] -struct Batch([Operation; 5]); - -#[test] -#[ignore] -fn fuzz() { - let mut options = EnvOpenOptions::new(); - options.map_size(1024 * 1024 * 1024 * 1024); - let _tempdir = TempDir::new().unwrap(); - let index = Index::new(options, _tempdir.path()).unwrap(); - let indexer_config = IndexerConfig::default(); - let index_documents_config = IndexDocumentsConfig::default(); - - loop { - let v: Vec = std::iter::repeat_with(|| fastrand::u8(..)).take(1000).collect(); - - let mut data = Unstructured::new(&v); - let batches = <[Batch; 5]>::arbitrary(&mut data).unwrap(); - - dbg!(&batches); - - let mut wtxn = index.write_txn().unwrap(); - - for batch in batches { - let mut builder = IndexDocuments::new( - &mut wtxn, - &index, - &indexer_config, - index_documents_config.clone(), - |_| (), - || false, - ) - .unwrap(); - - for op in batch.0 { - match op { - Operation::AddDoc(doc) => { - let documents = milli::documents::objects_from_json_value(doc.to_d()); - let documents = - milli::documents::documents_batch_reader_from_objects(documents); - let (b, _added) = builder.add_documents(documents).unwrap(); - builder = b; - } - Operation::DeleteDoc(id) => { - let (b, _removed) = builder.remove_documents(vec![id.to_s()]).unwrap(); - builder = b; - } - } - } - builder.execute().unwrap(); - // wtxn.commit().unwrap(); - - // after executing a batch we check if the database is corrupted - // let rtxn = index.read_txn().unwrap(); - let res = index.search(&wtxn).execute().unwrap(); - index.documents(&wtxn, res.documents_ids).unwrap(); - } - wtxn.abort().unwrap(); - } -} From f03d99690da2c8f07bddaff90b0782e05771d3e0 Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 29 May 2023 14:56:15 +0200 Subject: [PATCH 53/73] run the indexing fuzzer on every merge for as long as possible --- .github/workflows/fuzzer-indexing.yml | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 .github/workflows/fuzzer-indexing.yml diff --git a/.github/workflows/fuzzer-indexing.yml b/.github/workflows/fuzzer-indexing.yml new file mode 100644 index 000000000..1d01a6ea5 --- /dev/null +++ b/.github/workflows/fuzzer-indexing.yml @@ -0,0 +1,24 @@ +name: Run the indexing fuzzer + +on: + push: + branches: + - main + +jobs: + fuzz: + name: Setup the action + runs-on: ubuntu-latest + timeout-minutes: 4320 # 72h + steps: + - uses: actions/checkout@v3 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + + # Run benchmarks + - name: Run the fuzzer + run: | + cargo run --release --bin fuzz-indexing From 1213ec716467d689bb0699d94cf272cf10acce58 Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 29 May 2023 18:37:55 +0200 Subject: [PATCH 54/73] update the dashboard once again --- grafana-dashboards/dashboard.json | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/grafana-dashboards/dashboard.json b/grafana-dashboards/dashboard.json index 19355650b..d00069293 100644 --- a/grafana-dashboards/dashboard.json +++ b/grafana-dashboards/dashboard.json @@ -694,9 +694,9 @@ }, "gridPos": { "h": 11, - "w": 24, - "x": 0, - "y": 29 + "w": 12, + "x": 12, + "y": 18 }, "heatmap": {}, "hideZeroBuckets": false, @@ -711,12 +711,12 @@ "cellGap": 2, "cellValues": {}, "color": { - "exponent": 0.5, + "exponent": 0.4, "fill": "#b4ff00", "mode": "scheme", - "reverse": true, + "reverse": false, "scale": "exponential", - "scheme": "RdYlBu", + "scheme": "Blues", "steps": 128 }, "exemplars": { @@ -838,7 +838,7 @@ "h": 11, "w": 12, "x": 0, - "y": 40 + "y": 29 }, "id": 23, "interval": "5s", @@ -931,7 +931,7 @@ "h": 11, "w": 12, "x": 12, - "y": 40 + "y": 29 }, "id": 24, "interval": "5s", @@ -1024,7 +1024,7 @@ "h": 11, "w": 12, "x": 0, - "y": 51 + "y": 40 }, "id": 25, "interval": "5s", @@ -1068,7 +1068,7 @@ "h": 1, "w": 24, "x": 0, - "y": 62 + "y": 51 }, "id": 12, "panels": [], @@ -1146,7 +1146,7 @@ "h": 11, "w": 12, "x": 0, - "y": 63 + "y": 52 }, "id": 4, "interval": "5s", @@ -1249,7 +1249,7 @@ "h": 11, "w": 12, "x": 12, - "y": 63 + "y": 52 }, "id": 5, "interval": "5s", @@ -1371,6 +1371,6 @@ "timezone": "", "title": "Meilisearch", "uid": "7wcZ94dnz", - "version": 5, + "version": 6, "weekStart": "" } \ No newline at end of file From 85a80f4f4c2003113b70bee54d151b8c59ff5e05 Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 29 May 2023 18:39:34 +0200 Subject: [PATCH 55/73] move the grafana dashboard to the assets directory and upload a basic prometheus scraper to help new users --- .../grafana-dashboard.json | 0 assets/prometheus-basic-scraper.yml | 19 +++++++++++++++++++ 2 files changed, 19 insertions(+) rename grafana-dashboards/dashboard.json => assets/grafana-dashboard.json (100%) create mode 100644 assets/prometheus-basic-scraper.yml diff --git a/grafana-dashboards/dashboard.json b/assets/grafana-dashboard.json similarity index 100% rename from grafana-dashboards/dashboard.json rename to assets/grafana-dashboard.json diff --git a/assets/prometheus-basic-scraper.yml b/assets/prometheus-basic-scraper.yml new file mode 100644 index 000000000..762f0ad43 --- /dev/null +++ b/assets/prometheus-basic-scraper.yml @@ -0,0 +1,19 @@ +global: + scrape_interval: 15s # By default, scrape targets every 15 seconds. + + # Attach these labels to any time series or alerts when communicating with + # external systems (federation, remote storage, Alertmanager). + external_labels: + monitor: 'codelab-monitor' + +# A scrape configuration containing exactly one endpoint to scrape: +# Here it's Prometheus itself. +scrape_configs: + # The job name is added as a label `job=` to any timeseries scraped from this config. + - job_name: 'meilisearch' + + # Override the global default and scrape targets from this job every 5 seconds. + scrape_interval: 5s + + static_configs: + - targets: ['localhost:7700'] \ No newline at end of file From da04edff8c6b032d70fe58500c530d8b44cd04e1 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Tue, 30 May 2023 14:49:32 +0200 Subject: [PATCH 56/73] Better use deserialize_unchecked_from to reduce the deserialization time --- .../src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs | 4 ++-- milli/src/heed_codec/roaring_bitmap/roaring_bitmap_codec.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs b/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs index 1bd132974..01ce523ba 100644 --- a/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs +++ b/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs @@ -49,7 +49,7 @@ impl CboRoaringBitmapCodec { } else { // Otherwise, it means we used the classic RoaringBitmapCodec and // that the header takes threshold integers. - RoaringBitmap::deserialize_from(bytes) + RoaringBitmap::deserialize_unchecked_from(bytes) } } @@ -69,7 +69,7 @@ impl CboRoaringBitmapCodec { vec.push(integer); } } else { - roaring |= RoaringBitmap::deserialize_from(bytes.as_ref())?; + roaring |= RoaringBitmap::deserialize_unchecked_from(bytes.as_ref())?; } } diff --git a/milli/src/heed_codec/roaring_bitmap/roaring_bitmap_codec.rs b/milli/src/heed_codec/roaring_bitmap/roaring_bitmap_codec.rs index 8fae9b8fd..6cec0eb44 100644 --- a/milli/src/heed_codec/roaring_bitmap/roaring_bitmap_codec.rs +++ b/milli/src/heed_codec/roaring_bitmap/roaring_bitmap_codec.rs @@ -8,7 +8,7 @@ impl heed::BytesDecode<'_> for RoaringBitmapCodec { type DItem = RoaringBitmap; fn bytes_decode(bytes: &[u8]) -> Option { - RoaringBitmap::deserialize_from(bytes).ok() + RoaringBitmap::deserialize_unchecked_from(bytes).ok() } } From 2acc3ec5ee8ae6bd460024f36b16488355427ec9 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 30 May 2023 15:18:52 +0200 Subject: [PATCH 57/73] fix the type of the document deletion by filter tasks --- index-scheduler/src/utils.rs | 2 +- meilisearch-types/src/tasks.rs | 3 --- meilisearch/src/routes/tasks.rs | 2 +- meilisearch/tests/tasks/errors.rs | 6 +++--- 4 files changed, 5 insertions(+), 8 deletions(-) diff --git a/index-scheduler/src/utils.rs b/index-scheduler/src/utils.rs index 97f437bed..3971d9116 100644 --- a/index-scheduler/src/utils.rs +++ b/index-scheduler/src/utils.rs @@ -466,7 +466,7 @@ impl IndexScheduler { } } Details::DocumentDeletionByFilter { deleted_documents, original_filter: _ } => { - assert_eq!(kind.as_kind(), Kind::DocumentDeletionByFilter); + assert_eq!(kind.as_kind(), Kind::DocumentDeletion); let (index_uid, _) = if let KindWithContent::DocumentDeletionByFilter { ref index_uid, ref filter_expr, diff --git a/meilisearch-types/src/tasks.rs b/meilisearch-types/src/tasks.rs index e746a53b8..693ee4242 100644 --- a/meilisearch-types/src/tasks.rs +++ b/meilisearch-types/src/tasks.rs @@ -395,7 +395,6 @@ impl std::error::Error for ParseTaskStatusError {} pub enum Kind { DocumentAdditionOrUpdate, DocumentDeletion, - DocumentDeletionByFilter, SettingsUpdate, IndexCreation, IndexDeletion, @@ -412,7 +411,6 @@ impl Kind { match self { Kind::DocumentAdditionOrUpdate | Kind::DocumentDeletion - | Kind::DocumentDeletionByFilter | Kind::SettingsUpdate | Kind::IndexCreation | Kind::IndexDeletion @@ -430,7 +428,6 @@ impl Display for Kind { match self { Kind::DocumentAdditionOrUpdate => write!(f, "documentAdditionOrUpdate"), Kind::DocumentDeletion => write!(f, "documentDeletion"), - Kind::DocumentDeletionByFilter => write!(f, "documentDeletionByFilter"), Kind::SettingsUpdate => write!(f, "settingsUpdate"), Kind::IndexCreation => write!(f, "indexCreation"), Kind::IndexDeletion => write!(f, "indexDeletion"), diff --git a/meilisearch/src/routes/tasks.rs b/meilisearch/src/routes/tasks.rs index 4a2656982..2713d0988 100644 --- a/meilisearch/src/routes/tasks.rs +++ b/meilisearch/src/routes/tasks.rs @@ -730,7 +730,7 @@ mod tests { let err = deserr_query_params::(params).unwrap_err(); snapshot!(meili_snap::json_string!(err), @r###" { - "message": "Invalid value in parameter `types`: `createIndex` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `documentDeletionByFilter`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.", + "message": "Invalid value in parameter `types`: `createIndex` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.", "code": "invalid_task_types", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_task_types" diff --git a/meilisearch/tests/tasks/errors.rs b/meilisearch/tests/tasks/errors.rs index 065ff1aa9..830c4c8e7 100644 --- a/meilisearch/tests/tasks/errors.rs +++ b/meilisearch/tests/tasks/errors.rs @@ -97,7 +97,7 @@ async fn task_bad_types() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `documentDeletionByFilter`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.", + "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.", "code": "invalid_task_types", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_task_types" @@ -108,7 +108,7 @@ async fn task_bad_types() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `documentDeletionByFilter`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.", + "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.", "code": "invalid_task_types", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_task_types" @@ -119,7 +119,7 @@ async fn task_bad_types() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `documentDeletionByFilter`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.", + "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.", "code": "invalid_task_types", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_task_types" From 26c7e31f25474114a6dd43753f725057c5ae4645 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 1 Jun 2023 17:57:40 +0000 Subject: [PATCH 58/73] Bump Swatinem/rust-cache from 2.2.1 to 2.4.0 Bumps [Swatinem/rust-cache](https://github.com/Swatinem/rust-cache) from 2.2.1 to 2.4.0. - [Release notes](https://github.com/Swatinem/rust-cache/releases) - [Changelog](https://github.com/Swatinem/rust-cache/blob/master/CHANGELOG.md) - [Commits](https://github.com/Swatinem/rust-cache/compare/v2.2.1...v2.4.0) --- updated-dependencies: - dependency-name: Swatinem/rust-cache dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- .github/workflows/test-suite.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test-suite.yml b/.github/workflows/test-suite.yml index 4319624dd..e363d36c8 100644 --- a/.github/workflows/test-suite.yml +++ b/.github/workflows/test-suite.yml @@ -43,7 +43,7 @@ jobs: toolchain: nightly override: true - name: Cache dependencies - uses: Swatinem/rust-cache@v2.2.1 + uses: Swatinem/rust-cache@v2.4.0 - name: Run cargo check without any default features uses: actions-rs/cargo@v1 with: @@ -65,7 +65,7 @@ jobs: steps: - uses: actions/checkout@v3 - name: Cache dependencies - uses: Swatinem/rust-cache@v2.2.1 + uses: Swatinem/rust-cache@v2.4.0 - name: Run cargo check without any default features uses: actions-rs/cargo@v1 with: @@ -146,7 +146,7 @@ jobs: toolchain: stable override: true - name: Cache dependencies - uses: Swatinem/rust-cache@v2.2.1 + uses: Swatinem/rust-cache@v2.4.0 - name: Run tests in debug uses: actions-rs/cargo@v1 with: @@ -165,7 +165,7 @@ jobs: override: true components: clippy - name: Cache dependencies - uses: Swatinem/rust-cache@v2.2.1 + uses: Swatinem/rust-cache@v2.4.0 - name: Run cargo clippy uses: actions-rs/cargo@v1 with: @@ -184,7 +184,7 @@ jobs: override: true components: rustfmt - name: Cache dependencies - uses: Swatinem/rust-cache@v2.2.1 + uses: Swatinem/rust-cache@v2.4.0 - name: Run cargo fmt # Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file. # Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate From 4cc2988482c0b0ce03430e6b4c0dd4b09e05825d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 1 Jun 2023 17:57:43 +0000 Subject: [PATCH 59/73] Bump svenstaro/upload-release-action from 2.5.0 to 2.6.1 Bumps [svenstaro/upload-release-action](https://github.com/svenstaro/upload-release-action) from 2.5.0 to 2.6.1. - [Release notes](https://github.com/svenstaro/upload-release-action/releases) - [Changelog](https://github.com/svenstaro/upload-release-action/blob/master/CHANGELOG.md) - [Commits](https://github.com/svenstaro/upload-release-action/compare/2.5.0...2.6.1) --- updated-dependencies: - dependency-name: svenstaro/upload-release-action dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- .github/workflows/publish-apt-brew-pkg.yml | 2 +- .github/workflows/publish-binaries.yml | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/publish-apt-brew-pkg.yml b/.github/workflows/publish-apt-brew-pkg.yml index e24d8ccf1..f7ab8666b 100644 --- a/.github/workflows/publish-apt-brew-pkg.yml +++ b/.github/workflows/publish-apt-brew-pkg.yml @@ -35,7 +35,7 @@ jobs: - name: Build deb package run: cargo deb -p meilisearch -o target/debian/meilisearch.deb - name: Upload debian pkg to release - uses: svenstaro/upload-release-action@2.5.0 + uses: svenstaro/upload-release-action@2.6.1 with: repo_token: ${{ secrets.MEILI_BOT_GH_PAT }} file: target/debian/meilisearch.deb diff --git a/.github/workflows/publish-binaries.yml b/.github/workflows/publish-binaries.yml index 76558f3b1..c79176439 100644 --- a/.github/workflows/publish-binaries.yml +++ b/.github/workflows/publish-binaries.yml @@ -54,7 +54,7 @@ jobs: # No need to upload binaries for dry run (cron) - name: Upload binaries to release if: github.event_name == 'release' - uses: svenstaro/upload-release-action@2.5.0 + uses: svenstaro/upload-release-action@2.6.1 with: repo_token: ${{ secrets.MEILI_BOT_GH_PAT }} file: target/release/meilisearch @@ -87,7 +87,7 @@ jobs: # No need to upload binaries for dry run (cron) - name: Upload binaries to release if: github.event_name == 'release' - uses: svenstaro/upload-release-action@2.5.0 + uses: svenstaro/upload-release-action@2.6.1 with: repo_token: ${{ secrets.MEILI_BOT_GH_PAT }} file: target/release/${{ matrix.artifact_name }} @@ -121,7 +121,7 @@ jobs: - name: Upload the binary to release # No need to upload binaries for dry run (cron) if: github.event_name == 'release' - uses: svenstaro/upload-release-action@2.5.0 + uses: svenstaro/upload-release-action@2.6.1 with: repo_token: ${{ secrets.MEILI_BOT_GH_PAT }} file: target/${{ matrix.target }}/release/meilisearch @@ -183,7 +183,7 @@ jobs: - name: Upload the binary to release # No need to upload binaries for dry run (cron) if: github.event_name == 'release' - uses: svenstaro/upload-release-action@2.5.0 + uses: svenstaro/upload-release-action@2.6.1 with: repo_token: ${{ secrets.MEILI_BOT_GH_PAT }} file: target/${{ matrix.target }}/release/meilisearch From a13ed4d0b0a024b3daed2602b466c1b64a5751ca Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 1 Jun 2023 17:57:48 +0000 Subject: [PATCH 60/73] Bump actions/setup-go from 3 to 4 Bumps [actions/setup-go](https://github.com/actions/setup-go) from 3 to 4. - [Release notes](https://github.com/actions/setup-go/releases) - [Commits](https://github.com/actions/setup-go/compare/v3...v4) --- updated-dependencies: - dependency-name: actions/setup-go dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/sdks-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/sdks-tests.yml b/.github/workflows/sdks-tests.yml index c3b2c5d87..fedcb9997 100644 --- a/.github/workflows/sdks-tests.yml +++ b/.github/workflows/sdks-tests.yml @@ -144,7 +144,7 @@ jobs: - '7700:7700' steps: - name: Set up Go - uses: actions/setup-go@v3 + uses: actions/setup-go@v4 with: go-version: stable - uses: actions/checkout@v3 From b6b6a80b7619737de39a1e4f25aa75a3d224fdce Mon Sep 17 00:00:00 2001 From: curquiza Date: Tue, 6 Jun 2023 10:38:05 +0200 Subject: [PATCH 61/73] Fix SDK CI for scheduled jobs --- .github/workflows/sdks-tests.yml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/.github/workflows/sdks-tests.yml b/.github/workflows/sdks-tests.yml index fedcb9997..f625ab333 100644 --- a/.github/workflows/sdks-tests.yml +++ b/.github/workflows/sdks-tests.yml @@ -16,8 +16,23 @@ env: MEILI_NO_ANALYTICS: 'true' jobs: + define-docker-image: + runs-on: ubuntu-latest + outputs: + docker-image: ${{ steps.define-image.outputs.docker-image }} + steps: + - uses: actions/checkout@v3 + - name: Define the Docker image we need to use + id: define-image + run: | + event=${{ github.event.action }} + echo "docker-image=nightly" >> $GITHUB_OUTPUT + if [[ $event == 'workflow_dispatch' ]]; then + echo "docker-image=${{ github.event.inputs.docker_image }}" >> $GITHUB_OUTPUT + fi meilisearch-js-tests: + needs: define-docker-image name: JS SDK tests runs-on: ubuntu-latest services: @@ -52,6 +67,7 @@ jobs: run: yarn test:env:browser instant-meilisearch-tests: + needs: define-docker-image name: instant-meilisearch tests runs-on: ubuntu-latest services: @@ -78,6 +94,7 @@ jobs: run: yarn build meilisearch-php-tests: + needs: define-docker-image name: PHP SDK tests runs-on: ubuntu-latest services: @@ -108,6 +125,7 @@ jobs: composer remove --dev guzzlehttp/guzzle http-interop/http-factory-guzzle meilisearch-python-tests: + needs: define-docker-image name: Python SDK tests runs-on: ubuntu-latest services: @@ -132,6 +150,7 @@ jobs: run: pipenv run pytest meilisearch-go-tests: + needs: define-docker-image name: Go SDK tests runs-on: ubuntu-latest services: @@ -161,6 +180,7 @@ jobs: run: go test -v ./... meilisearch-ruby-tests: + needs: define-docker-image name: Ruby SDK tests runs-on: ubuntu-latest services: @@ -185,6 +205,7 @@ jobs: run: bundle exec rspec meilisearch-rust-tests: + needs: define-docker-image name: Rust SDK tests runs-on: ubuntu-latest services: From 3cfd653db1cbed174d6b9d727d38b361db0aac33 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 6 Jun 2023 11:38:41 +0200 Subject: [PATCH 62/73] Apply suggestions from code review Co-authored-by: Louis Dureuil --- index-scheduler/src/index_mapper/mod.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/index-scheduler/src/index_mapper/mod.rs b/index-scheduler/src/index_mapper/mod.rs index 86bec2927..e5bac4d30 100644 --- a/index-scheduler/src/index_mapper/mod.rs +++ b/index-scheduler/src/index_mapper/mod.rs @@ -88,9 +88,16 @@ pub enum IndexStatus { pub struct IndexStats { /// Number of documents in the index. pub number_of_documents: u64, - /// Size of the index' DB, in bytes. + /// Size taken up by the index' DB, in bytes. + /// + /// This includes the size taken by both the used and free pages of the DB, and as the free pages + /// are not returned to the disk after a deletion, this number is typically larger than + /// `used_database_size` that only includes the size of the used pages. pub database_size: u64, - /// Size of the index' DB, in bytes. + /// Size taken by the used pages of the index' DB, in bytes. + /// + /// As the DB backend does not return to the disk the pages that are not currently used by the DB, + /// this value is typically smaller than `database_size`. pub used_database_size: u64, /// Association of every field name with the number of times it occurs in the documents. pub field_distribution: FieldDistribution, From 4a3405afec696b2a947b1ac45e6fdd8dc6e46cea Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 6 Jun 2023 12:28:27 +0200 Subject: [PATCH 63/73] comment the stats method --- index-scheduler/src/index_mapper/mod.rs | 2 +- index-scheduler/src/lib.rs | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/index-scheduler/src/index_mapper/mod.rs b/index-scheduler/src/index_mapper/mod.rs index e5bac4d30..8754e7168 100644 --- a/index-scheduler/src/index_mapper/mod.rs +++ b/index-scheduler/src/index_mapper/mod.rs @@ -91,7 +91,7 @@ pub struct IndexStats { /// Size taken up by the index' DB, in bytes. /// /// This includes the size taken by both the used and free pages of the DB, and as the free pages - /// are not returned to the disk after a deletion, this number is typically larger than + /// are not returned to the disk after a deletion, this number is typically larger than /// `used_database_size` that only includes the size of the used pages. pub database_size: u64, /// Size taken by the used pages of the index' DB, in bytes. diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index c27546c8e..40570c668 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -743,6 +743,10 @@ impl IndexScheduler { Ok(tasks) } + /// The returned structure contains: + /// 1. The name of the property being observed can be `statuses`, `types`, or `indexes`. + /// 2. The name of the specific data related to the property can be `enqueued` for the `statuses`, `settingsUpdate` for the `types`, or the name of the index for the `indexes`, for example. + /// 3. The number of times the properties appeared. pub fn get_stats(&self) -> Result>> { let rtxn = self.read_txn()?; From 8628a0c85694c97b407fe08c49f6a3280cd22d21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Wed, 7 Jun 2023 10:02:21 +0200 Subject: [PATCH 64/73] Remove docid_word_positions_db + fix deletion bug That would happen when a word was deleted from all exact attributes but not all regular attributes. --- milli/src/index.rs | 12 +- milli/src/lib.rs | 46 ------ milli/src/snapshot_tests.rs | 9 -- milli/src/update/clear_documents.rs | 3 - milli/src/update/delete_documents.rs | 146 ++++++++++-------- .../src/update/index_documents/extract/mod.rs | 2 - .../helpers/merge_functions.rs | 5 - .../src/update/index_documents/helpers/mod.rs | 4 +- milli/src/update/index_documents/mod.rs | 11 +- .../src/update/index_documents/typed_chunk.rs | 34 +--- 10 files changed, 95 insertions(+), 177 deletions(-) diff --git a/milli/src/index.rs b/milli/src/index.rs index 9ea7b628c..0d74e0732 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -21,10 +21,9 @@ use crate::heed_codec::facet::{ }; use crate::heed_codec::{ScriptLanguageCodec, StrBEU16Codec, StrRefCodec}; use crate::{ - default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion, - DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId, - FieldIdWordCountCodec, GeoPoint, ObkvCodec, Result, RoaringBitmapCodec, RoaringBitmapLenCodec, - Search, U8StrStrCodec, BEU16, BEU32, + default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, + FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, GeoPoint, ObkvCodec, + Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16, BEU32, }; pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5; @@ -111,9 +110,6 @@ pub struct Index { /// A prefix of word and all the documents ids containing this prefix, from attributes for which typos are not allowed. pub exact_word_prefix_docids: Database, - /// Maps a word and a document id (u32) to all the positions where the given word appears. - pub docid_word_positions: Database, - /// Maps the proximity between a pair of words with all the docids where this relation appears. pub word_pair_proximity_docids: Database, /// Maps the proximity between a pair of word and prefix with all the docids where this relation appears. @@ -177,7 +173,6 @@ impl Index { let word_prefix_docids = env.create_database(&mut wtxn, Some(WORD_PREFIX_DOCIDS))?; let exact_word_prefix_docids = env.create_database(&mut wtxn, Some(EXACT_WORD_PREFIX_DOCIDS))?; - let docid_word_positions = env.create_database(&mut wtxn, Some(DOCID_WORD_POSITIONS))?; let word_pair_proximity_docids = env.create_database(&mut wtxn, Some(WORD_PAIR_PROXIMITY_DOCIDS))?; let script_language_docids = @@ -220,7 +215,6 @@ impl Index { exact_word_docids, word_prefix_docids, exact_word_prefix_docids, - docid_word_positions, word_pair_proximity_docids, script_language_docids, word_prefix_pair_proximity_docids, diff --git a/milli/src/lib.rs b/milli/src/lib.rs index 48699e76f..e7acdde2c 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -5,52 +5,6 @@ #[global_allocator] pub static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc; -// #[cfg(test)] -// pub mod allocator { -// use std::alloc::{GlobalAlloc, System}; -// use std::sync::atomic::{self, AtomicI64}; - -// #[global_allocator] -// pub static ALLOC: CountingAlloc = CountingAlloc { -// max_resident: AtomicI64::new(0), -// resident: AtomicI64::new(0), -// allocated: AtomicI64::new(0), -// }; - -// pub struct CountingAlloc { -// pub max_resident: AtomicI64, -// pub resident: AtomicI64, -// pub allocated: AtomicI64, -// } -// unsafe impl GlobalAlloc for CountingAlloc { -// unsafe fn alloc(&self, layout: std::alloc::Layout) -> *mut u8 { -// self.allocated.fetch_add(layout.size() as i64, atomic::Ordering::SeqCst); -// let old_resident = -// self.resident.fetch_add(layout.size() as i64, atomic::Ordering::SeqCst); - -// let resident = old_resident + layout.size() as i64; -// self.max_resident.fetch_max(resident, atomic::Ordering::SeqCst); - -// // if layout.size() > 1_000_000 { -// // eprintln!( -// // "allocating {} with new resident size: {resident}", -// // layout.size() / 1_000_000 -// // ); -// // // let trace = std::backtrace::Backtrace::capture(); -// // // let t = trace.to_string(); -// // // eprintln!("{t}"); -// // } - -// System.alloc(layout) -// } - -// unsafe fn dealloc(&self, ptr: *mut u8, layout: std::alloc::Layout) { -// self.resident.fetch_sub(layout.size() as i64, atomic::Ordering::Relaxed); -// System.dealloc(ptr, layout) -// } -// } -// } - #[macro_use] pub mod documents; diff --git a/milli/src/snapshot_tests.rs b/milli/src/snapshot_tests.rs index b70bea496..25c1088b9 100644 --- a/milli/src/snapshot_tests.rs +++ b/milli/src/snapshot_tests.rs @@ -89,7 +89,6 @@ Create a snapshot test of the given database. - `exact_word_docids` - `word_prefix_docids` - `exact_word_prefix_docids` - - `docid_word_positions` - `word_pair_proximity_docids` - `word_prefix_pair_proximity_docids` - `word_position_docids` @@ -217,11 +216,6 @@ pub fn snap_exact_word_prefix_docids(index: &Index) -> String { &format!("{s:<16} {}", display_bitmap(&b)) }) } -pub fn snap_docid_word_positions(index: &Index) -> String { - make_db_snap_from_iter!(index, docid_word_positions, |((idx, s), b)| { - &format!("{idx:<6} {s:<16} {}", display_bitmap(&b)) - }) -} pub fn snap_word_pair_proximity_docids(index: &Index) -> String { make_db_snap_from_iter!(index, word_pair_proximity_docids, |((proximity, word1, word2), b)| { &format!("{proximity:<2} {word1:<16} {word2:<16} {}", display_bitmap(&b)) @@ -477,9 +471,6 @@ macro_rules! full_snap_of_db { ($index:ident, exact_word_prefix_docids) => {{ $crate::snapshot_tests::snap_exact_word_prefix_docids(&$index) }}; - ($index:ident, docid_word_positions) => {{ - $crate::snapshot_tests::snap_docid_word_positions(&$index) - }}; ($index:ident, word_pair_proximity_docids) => {{ $crate::snapshot_tests::snap_word_pair_proximity_docids(&$index) }}; diff --git a/milli/src/update/clear_documents.rs b/milli/src/update/clear_documents.rs index 147643bad..04119c641 100644 --- a/milli/src/update/clear_documents.rs +++ b/milli/src/update/clear_documents.rs @@ -23,7 +23,6 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> { exact_word_docids, word_prefix_docids, exact_word_prefix_docids, - docid_word_positions, word_pair_proximity_docids, word_prefix_pair_proximity_docids, prefix_word_pair_proximity_docids, @@ -80,7 +79,6 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> { exact_word_docids.clear(self.wtxn)?; word_prefix_docids.clear(self.wtxn)?; exact_word_prefix_docids.clear(self.wtxn)?; - docid_word_positions.clear(self.wtxn)?; word_pair_proximity_docids.clear(self.wtxn)?; word_prefix_pair_proximity_docids.clear(self.wtxn)?; prefix_word_pair_proximity_docids.clear(self.wtxn)?; @@ -141,7 +139,6 @@ mod tests { assert!(index.word_docids.is_empty(&rtxn).unwrap()); assert!(index.word_prefix_docids.is_empty(&rtxn).unwrap()); - assert!(index.docid_word_positions.is_empty(&rtxn).unwrap()); assert!(index.word_pair_proximity_docids.is_empty(&rtxn).unwrap()); assert!(index.field_id_word_count_docids.is_empty(&rtxn).unwrap()); assert!(index.word_prefix_pair_proximity_docids.is_empty(&rtxn).unwrap()); diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs index 311f93f8f..60cd41e8a 100644 --- a/milli/src/update/delete_documents.rs +++ b/milli/src/update/delete_documents.rs @@ -1,5 +1,5 @@ use std::collections::btree_map::Entry; -use std::collections::{HashMap, HashSet}; +use std::collections::{BTreeSet, HashMap, HashSet}; use fst::IntoStreamer; use heed::types::{ByteSlice, DecodeIgnore, Str, UnalignedSlice}; @@ -15,8 +15,7 @@ use crate::facet::FacetType; use crate::heed_codec::facet::FieldDocIdFacetCodec; use crate::heed_codec::CboRoaringBitmapCodec; use crate::{ - ExternalDocumentsIds, FieldId, FieldIdMapMissingEntry, Index, Result, RoaringBitmapCodec, - SmallString32, BEU32, + ExternalDocumentsIds, FieldId, FieldIdMapMissingEntry, Index, Result, RoaringBitmapCodec, BEU32, }; pub struct DeleteDocuments<'t, 'u, 'i> { @@ -232,7 +231,6 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { exact_word_docids, word_prefix_docids, exact_word_prefix_docids, - docid_word_positions, word_pair_proximity_docids, field_id_word_count_docids, word_prefix_pair_proximity_docids, @@ -251,23 +249,9 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { facet_id_is_empty_docids, documents, } = self.index; - - // Retrieve the words contained in the documents. - let mut words = Vec::new(); + // Remove from the documents database for docid in &self.to_delete_docids { documents.delete(self.wtxn, &BEU32::new(docid))?; - - // We iterate through the words positions of the document id, retrieve the word and delete the positions. - // We create an iterator to be able to get the content and delete the key-value itself. - // It's faster to acquire a cursor to get and delete, as we avoid traversing the LMDB B-Tree two times but only once. - let mut iter = docid_word_positions.prefix_iter_mut(self.wtxn, &(docid, ""))?; - while let Some(result) = iter.next() { - let ((_docid, word), _positions) = result?; - // This boolean will indicate if we must remove this word from the words FST. - words.push((SmallString32::from(word), false)); - // safety: we don't keep references from inside the LMDB database. - unsafe { iter.del_current()? }; - } } // We acquire the current external documents ids map... // Note that its soft-deleted document ids field will be equal to the `to_delete_docids` @@ -278,42 +262,27 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { let new_external_documents_ids = new_external_documents_ids.into_static(); self.index.put_external_documents_ids(self.wtxn, &new_external_documents_ids)?; - // Maybe we can improve the get performance of the words - // if we sort the words first, keeping the LMDB pages in cache. - words.sort_unstable(); - + let mut words_to_keep = BTreeSet::default(); + let mut words_to_delete = BTreeSet::default(); // We iterate over the words and delete the documents ids // from the word docids database. - for (word, must_remove) in &mut words { - remove_from_word_docids( - self.wtxn, - word_docids, - word.as_str(), - must_remove, - &self.to_delete_docids, - )?; - - remove_from_word_docids( - self.wtxn, - exact_word_docids, - word.as_str(), - must_remove, - &self.to_delete_docids, - )?; - } + remove_from_word_docids( + self.wtxn, + word_docids, + &self.to_delete_docids, + &mut words_to_keep, + &mut words_to_delete, + )?; + remove_from_word_docids( + self.wtxn, + exact_word_docids, + &self.to_delete_docids, + &mut words_to_keep, + &mut words_to_delete, + )?; // We construct an FST set that contains the words to delete from the words FST. - let words_to_delete = - words.iter().filter_map( - |(word, must_remove)| { - if *must_remove { - Some(word.as_str()) - } else { - None - } - }, - ); - let words_to_delete = fst::Set::from_iter(words_to_delete)?; + let words_to_delete = fst::Set::from_iter(words_to_delete.difference(&words_to_keep))?; let new_words_fst = { // We retrieve the current words FST from the database. @@ -532,23 +501,24 @@ fn remove_from_word_prefix_docids( fn remove_from_word_docids( txn: &mut heed::RwTxn, db: &heed::Database, - word: &str, - must_remove: &mut bool, to_remove: &RoaringBitmap, + words_to_keep: &mut BTreeSet, + words_to_remove: &mut BTreeSet, ) -> Result<()> { // We create an iterator to be able to get the content and delete the word docids. // It's faster to acquire a cursor to get and delete or put, as we avoid traversing // the LMDB B-Tree two times but only once. - let mut iter = db.prefix_iter_mut(txn, word)?; - if let Some((key, mut docids)) = iter.next().transpose()? { - if key == word { - let previous_len = docids.len(); - docids -= to_remove; - if docids.is_empty() { - // safety: we don't keep references from inside the LMDB database. - unsafe { iter.del_current()? }; - *must_remove = true; - } else if docids.len() != previous_len { + let mut iter = db.iter_mut(txn)?; + while let Some((key, mut docids)) = iter.next().transpose()? { + let previous_len = docids.len(); + docids -= to_remove; + if docids.is_empty() { + // safety: we don't keep references from inside the LMDB database. + unsafe { iter.del_current()? }; + words_to_remove.insert(key.to_owned()); + } else { + words_to_keep.insert(key.to_owned()); + if docids.len() != previous_len { let key = key.to_owned(); // safety: we don't keep references from inside the LMDB database. unsafe { iter.put_current(&key, &docids)? }; @@ -627,7 +597,7 @@ mod tests { use super::*; use crate::index::tests::TempIndex; - use crate::{db_snap, Filter}; + use crate::{db_snap, Filter, Search}; fn delete_documents<'t>( wtxn: &mut RwTxn<'t, '_>, @@ -1199,4 +1169,52 @@ mod tests { DeletionStrategy::AlwaysSoft, ); } + + #[test] + fn delete_words_exact_attributes() { + let index = TempIndex::new(); + + index + .update_settings(|settings| { + settings.set_primary_key(S("id")); + settings.set_searchable_fields(vec![S("text"), S("exact")]); + settings.set_exact_attributes(vec![S("exact")].into_iter().collect()); + }) + .unwrap(); + + index + .add_documents(documents!([ + { "id": 0, "text": "hello" }, + { "id": 1, "exact": "hello"} + ])) + .unwrap(); + db_snap!(index, word_docids, 1, @r###" + hello [0, ] + "###); + db_snap!(index, exact_word_docids, 1, @r###" + hello [1, ] + "###); + db_snap!(index, words_fst, 1, @"300000000000000001084cfcfc2ce1000000016000000090ea47f"); + + let mut wtxn = index.write_txn().unwrap(); + let deleted_internal_ids = + delete_documents(&mut wtxn, &index, &["1"], DeletionStrategy::AlwaysHard); + wtxn.commit().unwrap(); + + db_snap!(index, word_docids, 2, @r###" + hello [0, ] + "###); + db_snap!(index, exact_word_docids, 2, @""); + db_snap!(index, words_fst, 2, @"300000000000000001084cfcfc2ce1000000016000000090ea47f"); + + insta::assert_snapshot!(format!("{deleted_internal_ids:?}"), @"[1]"); + let txn = index.read_txn().unwrap(); + let words = index.words_fst(&txn).unwrap().into_stream().into_strs().unwrap(); + insta::assert_snapshot!(format!("{words:?}"), @r###"["hello"]"###); + + let mut s = Search::new(&txn, &index); + s.query("hello"); + let crate::SearchResult { documents_ids, .. } = s.execute().unwrap(); + insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]"); + } } diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs index 3df8321bc..632f568ab 100644 --- a/milli/src/update/index_documents/extract/mod.rs +++ b/milli/src/update/index_documents/extract/mod.rs @@ -325,8 +325,6 @@ fn send_and_extract_flattened_documents_data( // send docid_word_positions_chunk to DB writer let docid_word_positions_chunk = unsafe { as_cloneable_grenad(&docid_word_positions_chunk)? }; - let _ = lmdb_writer_sx - .send(Ok(TypedChunk::DocidWordPositions(docid_word_positions_chunk.clone()))); let _ = lmdb_writer_sx.send(Ok(TypedChunk::ScriptLanguageDocids(script_language_pair))); diff --git a/milli/src/update/index_documents/helpers/merge_functions.rs b/milli/src/update/index_documents/helpers/merge_functions.rs index 7b8891a7a..64bee95df 100644 --- a/milli/src/update/index_documents/helpers/merge_functions.rs +++ b/milli/src/update/index_documents/helpers/merge_functions.rs @@ -4,7 +4,6 @@ use std::result::Result as StdResult; use roaring::RoaringBitmap; -use super::read_u32_ne_bytes; use crate::heed_codec::CboRoaringBitmapCodec; use crate::update::index_documents::transform::Operation; use crate::Result; @@ -22,10 +21,6 @@ pub fn concat_u32s_array<'a>(_key: &[u8], values: &[Cow<'a, [u8]>]) -> Result RoaringBitmap { - read_u32_ne_bytes(slice).collect() -} - pub fn serialize_roaring_bitmap(bitmap: &RoaringBitmap, buffer: &mut Vec) -> io::Result<()> { buffer.clear(); buffer.reserve(bitmap.serialized_size()); diff --git a/milli/src/update/index_documents/helpers/mod.rs b/milli/src/update/index_documents/helpers/mod.rs index ce6a2abe9..95e497af4 100644 --- a/milli/src/update/index_documents/helpers/mod.rs +++ b/milli/src/update/index_documents/helpers/mod.rs @@ -14,8 +14,8 @@ pub use grenad_helpers::{ }; pub use merge_functions::{ concat_u32s_array, keep_first, keep_latest_obkv, merge_cbo_roaring_bitmaps, - merge_obkvs_and_operations, merge_roaring_bitmaps, merge_two_obkvs, - roaring_bitmap_from_u32s_array, serialize_roaring_bitmap, MergeFn, + merge_obkvs_and_operations, merge_roaring_bitmaps, merge_two_obkvs, serialize_roaring_bitmap, + MergeFn, }; use crate::MAX_WORD_LENGTH; diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index bbfa1d00c..e45d927c8 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -2471,11 +2471,11 @@ mod tests { { "id": 3, "text": "a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a " } ])) @@ -2513,6 +2513,5 @@ mod tests { db_snap!(index, word_fid_docids, 3, @"4c2e2a1832e5802796edc1638136d933"); db_snap!(index, word_position_docids, 3, @"74f556b91d161d997a89468b4da1cb8f"); - db_snap!(index, docid_word_positions, 3, @"5287245332627675740b28bd46e1cde1"); } } diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs index 53f6d807a..89b10bffe 100644 --- a/milli/src/update/index_documents/typed_chunk.rs +++ b/milli/src/update/index_documents/typed_chunk.rs @@ -7,24 +7,19 @@ use std::io; use charabia::{Language, Script}; use grenad::MergerBuilder; use heed::types::ByteSlice; -use heed::{BytesDecode, RwTxn}; +use heed::RwTxn; use roaring::RoaringBitmap; use super::helpers::{ - self, merge_ignore_values, roaring_bitmap_from_u32s_array, serialize_roaring_bitmap, - valid_lmdb_key, CursorClonableMmap, + self, merge_ignore_values, serialize_roaring_bitmap, valid_lmdb_key, CursorClonableMmap, }; use super::{ClonableMmap, MergeFn}; use crate::facet::FacetType; use crate::update::facet::FacetsUpdate; use crate::update::index_documents::helpers::as_cloneable_grenad; -use crate::{ - lat_lng_to_xyz, BoRoaringBitmapCodec, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index, - Result, -}; +use crate::{lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index, Result}; pub(crate) enum TypedChunk { - DocidWordPositions(grenad::Reader), FieldIdDocidFacetStrings(grenad::Reader), FieldIdDocidFacetNumbers(grenad::Reader), Documents(grenad::Reader), @@ -56,29 +51,6 @@ pub(crate) fn write_typed_chunk_into_index( ) -> Result<(RoaringBitmap, bool)> { let mut is_merged_database = false; match typed_chunk { - TypedChunk::DocidWordPositions(docid_word_positions_iter) => { - write_entries_into_database( - docid_word_positions_iter, - &index.docid_word_positions, - wtxn, - index_is_empty, - |value, buffer| { - // ensure that values are unique and ordered - let positions = roaring_bitmap_from_u32s_array(value); - BoRoaringBitmapCodec::serialize_into(&positions, buffer); - Ok(buffer) - }, - |new_values, db_values, buffer| { - let new_values = roaring_bitmap_from_u32s_array(new_values); - let positions = match BoRoaringBitmapCodec::bytes_decode(db_values) { - Some(db_values) => new_values | db_values, - None => new_values, // should not happen - }; - BoRoaringBitmapCodec::serialize_into(&positions, buffer); - Ok(()) - }, - )?; - } TypedChunk::Documents(obkv_documents_iter) => { let mut cursor = obkv_documents_iter.into_cursor()?; while let Some((key, value)) = cursor.move_on_next()? { From c15c076da95a87db9fb62caff4308427cdfe4824 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 8 Jun 2023 11:30:35 +0200 Subject: [PATCH 65/73] DB BREAKING: Count the number of words in field_id_word_count_docids --- .../extract/extract_fid_word_count_docids.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs b/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs index 315ebdf0c..6952eb484 100644 --- a/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs +++ b/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs @@ -1,6 +1,6 @@ use std::collections::HashMap; use std::fs::File; -use std::{cmp, io}; +use std::io; use grenad::Sorter; @@ -54,11 +54,10 @@ pub fn extract_fid_word_count_docids( } for position in read_u32_ne_bytes(value) { - let (field_id, position) = relative_from_absolute_position(position); - let word_count = position as u32 + 1; + let (field_id, _) = relative_from_absolute_position(position); let value = document_fid_wordcount.entry(field_id as FieldId).or_insert(0); - *value = cmp::max(*value, word_count); + *value += 1; } } From 9f37b61666363bf87175ad8722b1cd3929fd0160 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 8 Jun 2023 11:31:38 +0200 Subject: [PATCH 66/73] DB BREAKING: raise limit of word count from 10 to 30. --- .../index_documents/extract/extract_fid_word_count_docids.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs b/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs index 6952eb484..fe8eb93ed 100644 --- a/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs +++ b/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs @@ -82,7 +82,7 @@ fn drain_document_fid_wordcount_into_sorter( let mut key_buffer = Vec::new(); for (fid, count) in document_fid_wordcount.drain() { - if count <= 10 { + if count <= 30 { key_buffer.clear(); key_buffer.extend_from_slice(&fid.to_be_bytes()); key_buffer.push(count as u8); From a2a3b8c9739e6d04dc8e4fbd4f31afff4163fd4b Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 6 Jun 2023 15:08:13 +0200 Subject: [PATCH 67/73] Fix offset difference between query and indexing for hard separators --- milli/src/search/new/query_term/parse_query.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/milli/src/search/new/query_term/parse_query.rs b/milli/src/search/new/query_term/parse_query.rs index 69c2cd9c9..6f146b208 100644 --- a/milli/src/search/new/query_term/parse_query.rs +++ b/milli/src/search/new/query_term/parse_query.rs @@ -79,7 +79,7 @@ pub fn located_query_terms_from_tokens( TokenKind::Separator(separator_kind) => { // add penalty for hard separators if let SeparatorKind::Hard = separator_kind { - position = position.wrapping_add(1); + position = position.wrapping_add(7); } phrase = 'phrase: { From 4e81445d42c4bde0f157d7595030161f18c81f8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Mon, 12 Jun 2023 15:30:51 +0200 Subject: [PATCH 68/73] Stop the fuzzer after an hour --- fuzzers/src/bin/fuzz-indexing.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fuzzers/src/bin/fuzz-indexing.rs b/fuzzers/src/bin/fuzz-indexing.rs index 4c6a55575..1d53e069c 100644 --- a/fuzzers/src/bin/fuzz-indexing.rs +++ b/fuzzers/src/bin/fuzz-indexing.rs @@ -130,9 +130,14 @@ fn main() { let start = std::time::Instant::now(); loop { let total = progression.load(Ordering::Relaxed); + let elapsed = start.elapsed().as_secs(); + if elapsed > 3600 { + // after 1 hour, stop the fuzzer, success + std::process::exit(0); + } println!( - "Has been running for {:?}. Tested {} new values for a total of {}.", - start.elapsed(), + "Has been running for {:?} seconds. Tested {} new values for a total of {}.", + elapsed, total - last_value, total ); From 2da86b31a6097d0b4bb44de54d0ebe69fcd1315f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Tue, 6 Jun 2023 11:31:26 +0200 Subject: [PATCH 69/73] Remove comments and add documentation --- milli/src/search/new/distinct.rs | 1 - milli/src/search/new/exact_attribute.rs | 2 +- milli/src/search/new/interner.rs | 2 +- milli/src/search/new/limits.rs | 1 - milli/src/search/new/query_graph.rs | 4 +- milli/src/search/new/query_term/mod.rs | 4 - .../src/search/new/query_term/parse_query.rs | 3 - .../new/ranking_rule_graph/cheapest_paths.rs | 109 +++++++++++++++--- .../condition_docids_cache.rs | 6 +- .../new/ranking_rule_graph/dead_ends_cache.rs | 3 +- .../search/new/ranking_rule_graph/fid/mod.rs | 9 +- .../new/ranking_rule_graph/position/mod.rs | 7 +- .../proximity/compute_docids.rs | 16 +-- milli/src/search/new/resolve_query_graph.rs | 10 -- milli/src/search/new/sort.rs | 4 - milli/src/search/new/tests/distinct.rs | 2 +- milli/src/search/new/tests/proximity.rs | 16 +-- milli/src/search/new/tests/proximity_typo.rs | 7 +- milli/src/search/new/tests/typo.rs | 4 +- 19 files changed, 117 insertions(+), 93 deletions(-) diff --git a/milli/src/search/new/distinct.rs b/milli/src/search/new/distinct.rs index fbb7550a9..fff96bd5d 100644 --- a/milli/src/search/new/distinct.rs +++ b/milli/src/search/new/distinct.rs @@ -26,7 +26,6 @@ pub fn apply_distinct_rule( ctx: &mut SearchContext, field_id: u16, candidates: &RoaringBitmap, - // TODO: add a universe here, such that the `excluded` are a subset of the universe? ) -> Result { let mut excluded = RoaringBitmap::new(); let mut remaining = RoaringBitmap::new(); diff --git a/milli/src/search/new/exact_attribute.rs b/milli/src/search/new/exact_attribute.rs index dc9c95d3d..6e0381295 100644 --- a/milli/src/search/new/exact_attribute.rs +++ b/milli/src/search/new/exact_attribute.rs @@ -206,7 +206,7 @@ impl State { )?; intersection &= &candidates; if !intersection.is_empty() { - // TODO: although not really worth it in terms of performance, + // Although not really worth it in terms of performance, // if would be good to put this in cache for the sake of consistency let candidates_with_exact_word_count = if count_all_positions < u8::MAX as usize { ctx.index diff --git a/milli/src/search/new/interner.rs b/milli/src/search/new/interner.rs index ebf18f38c..c2d325a86 100644 --- a/milli/src/search/new/interner.rs +++ b/milli/src/search/new/interner.rs @@ -32,7 +32,7 @@ impl Interned { #[derive(Clone)] pub struct DedupInterner { stable_store: Vec, - lookup: FxHashMap>, // TODO: Arc + lookup: FxHashMap>, } impl Default for DedupInterner { fn default() -> Self { diff --git a/milli/src/search/new/limits.rs b/milli/src/search/new/limits.rs index 33a5a4a6c..d08946424 100644 --- a/milli/src/search/new/limits.rs +++ b/milli/src/search/new/limits.rs @@ -1,5 +1,4 @@ /// Maximum number of tokens we consider in a single search. -// TODO: Loic, find proper value here so we don't overflow the interner. pub const MAX_TOKEN_COUNT: usize = 1_000; /// Maximum number of prefixes that can be derived from a single word. diff --git a/milli/src/search/new/query_graph.rs b/milli/src/search/new/query_graph.rs index dc25d1bc3..114eb8c4e 100644 --- a/milli/src/search/new/query_graph.rs +++ b/milli/src/search/new/query_graph.rs @@ -92,7 +92,7 @@ impl QueryGraph { /// which contains ngrams. pub fn from_query( ctx: &mut SearchContext, - // NOTE: the terms here must be consecutive + // The terms here must be consecutive terms: &[LocatedQueryTerm], ) -> Result<(QueryGraph, Vec)> { let mut new_located_query_terms = terms.to_vec(); @@ -103,7 +103,7 @@ impl QueryGraph { let root_node = 0; let end_node = 1; - // TODO: we could consider generalizing to 4,5,6,7,etc. ngrams + // Ee could consider generalizing to 4,5,6,7,etc. ngrams let (mut prev2, mut prev1, mut prev0): (Vec, Vec, Vec) = (vec![], vec![], vec![root_node]); diff --git a/milli/src/search/new/query_term/mod.rs b/milli/src/search/new/query_term/mod.rs index fb749a797..8db843037 100644 --- a/milli/src/search/new/query_term/mod.rs +++ b/milli/src/search/new/query_term/mod.rs @@ -132,7 +132,6 @@ impl QueryTermSubset { if full_query_term.ngram_words.is_some() { return None; } - // TODO: included in subset if let Some(phrase) = full_query_term.zero_typo.phrase { self.zero_typo_subset.contains_phrase(phrase).then_some(ExactTerm::Phrase(phrase)) } else if let Some(word) = full_query_term.zero_typo.exact { @@ -182,7 +181,6 @@ impl QueryTermSubset { let word = match &self.zero_typo_subset { NTypoTermSubset::All => Some(use_prefix_db), NTypoTermSubset::Subset { words, phrases: _ } => { - // TODO: use a subset of prefix words instead if words.contains(&use_prefix_db) { Some(use_prefix_db) } else { @@ -204,7 +202,6 @@ impl QueryTermSubset { ctx: &mut SearchContext, ) -> Result> { let mut result = BTreeSet::default(); - // TODO: a compute_partially funtion if !self.one_typo_subset.is_empty() || !self.two_typo_subset.is_empty() { self.original.compute_fully_if_needed(ctx)?; } @@ -300,7 +297,6 @@ impl QueryTermSubset { let mut result = BTreeSet::default(); if !self.one_typo_subset.is_empty() { - // TODO: compute less than fully if possible self.original.compute_fully_if_needed(ctx)?; } let original = ctx.term_interner.get_mut(self.original); diff --git a/milli/src/search/new/query_term/parse_query.rs b/milli/src/search/new/query_term/parse_query.rs index 6f146b208..5e97d6578 100644 --- a/milli/src/search/new/query_term/parse_query.rs +++ b/milli/src/search/new/query_term/parse_query.rs @@ -139,7 +139,6 @@ pub fn number_of_typos_allowed<'ctx>( let min_len_one_typo = ctx.index.min_word_len_one_typo(ctx.txn)?; let min_len_two_typos = ctx.index.min_word_len_two_typos(ctx.txn)?; - // TODO: should `exact_words` also disable prefix search, ngrams, split words, or synonyms? let exact_words = ctx.index.exact_words(ctx.txn)?; Ok(Box::new(move |word: &str| { @@ -250,8 +249,6 @@ impl PhraseBuilder { } else { // token has kind Word let word = ctx.word_interner.insert(token.lemma().to_string()); - // TODO: in a phrase, check that every word exists - // otherwise return an empty term self.words.push(Some(word)); } } diff --git a/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs b/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs index 8fd943e6e..e93a91d29 100644 --- a/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs +++ b/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs @@ -1,5 +1,48 @@ -#![allow(clippy::too_many_arguments)] +/** Implements a "PathVisitor" which finds all paths of a certain cost +from the START to END node of a ranking rule graph. +A path is a list of conditions. A condition is the data associated with +an edge, given by the ranking rule. Some edges don't have a condition associated +with them, they are "unconditional". These kinds of edges are used to "skip" a node. + +The algorithm uses a depth-first search. It benefits from two main optimisations: +- The list of all possible costs to go from any node to the END node is precomputed +- The `DeadEndsCache` reduces the number of valid paths drastically, by making some edges +untraversable depending on what other edges were selected. + +These two optimisations are meant to avoid traversing edges that wouldn't lead +to a valid path. In practically all cases, we avoid the exponential complexity +that is inherent to depth-first search in a large ranking rule graph. + +The DeadEndsCache is a sort of prefix tree which associates a list of forbidden +conditions to a list of traversed conditions. +For example, the DeadEndsCache could say the following: +- Immediately, from the start, the conditions `[a,b]` are forbidden + - if we take the condition `c`, then the conditions `[e]` are also forbidden + - and if after that, we take `f`, then `[h,i]` are also forbidden + - etc. + - if we take `g`, then `[f]` is also forbidden + - etc. + - etc. +As we traverse the graph, we also traverse the `DeadEndsCache` and keep a list of forbidden +conditions in memory. Then, we know to avoid all edges which have a condition that is forbidden. + +When a path is found from START to END, we give it to the `visit` closure. +This closure takes a mutable reference to the `DeadEndsCache`. This means that +the caller can update this cache. Therefore, we must handle the case where the +DeadEndsCache has been updated. This means potentially backtracking up to the point +where the traversed conditions are all allowed by the new DeadEndsCache. + +The algorithm also implements the `TermsMatchingStrategy` logic. +Some edges are augmented with a list of "nodes_to_skip". Skipping +a node means "reaching this node through an unconditional edge". If we have +already traversed (ie. not skipped) a node that is in this list, then we know that we +can't traverse this edge. Otherwise, we traverse the edge but make sure to skip any +future node that was present in the "nodes_to_skip" list. + +The caller can decide to stop the path finding algorithm +by returning a `ControlFlow::Break` from the `visit` closure. +*/ use std::collections::{BTreeSet, VecDeque}; use std::iter::FromIterator; use std::ops::ControlFlow; @@ -12,30 +55,41 @@ use crate::search::new::query_graph::QueryNode; use crate::search::new::small_bitmap::SmallBitmap; use crate::Result; +/// Closure which processes a path found by the `PathVisitor` type VisitFn<'f, G> = &'f mut dyn FnMut( + // the path as a list of conditions &[Interned<::Condition>], &mut RankingRuleGraph, + // a mutable reference to the DeadEndsCache, to update it in case the given + // path doesn't resolve to any valid document ids &mut DeadEndsCache<::Condition>, ) -> Result>; +/// A structure which is kept but not updated during the traversal of the graph. +/// It can however be updated by the `visit` closure once a valid path has been found. struct VisitorContext<'a, G: RankingRuleGraphTrait> { graph: &'a mut RankingRuleGraph, all_costs_from_node: &'a MappedInterner>, dead_ends_cache: &'a mut DeadEndsCache, } +/// The internal state of the traversal algorithm struct VisitorState { + /// Budget from the current node to the end node remaining_cost: u64, - + /// Previously visited conditions, in order. path: Vec>, - + /// Previously visited conditions, as an efficient and compact set. visited_conditions: SmallBitmap, + /// Previously visited (ie not skipped) nodes, as an efficient and compact set. visited_nodes: SmallBitmap, - + /// The conditions that cannot be visited anymore forbidden_conditions: SmallBitmap, - forbidden_conditions_to_nodes: SmallBitmap, + /// The nodes that cannot be visited anymore (they must be skipped) + nodes_to_skip: SmallBitmap, } +/// See module documentation pub struct PathVisitor<'a, G: RankingRuleGraphTrait> { state: VisitorState, ctx: VisitorContext<'a, G>, @@ -56,14 +110,13 @@ impl<'a, G: RankingRuleGraphTrait> PathVisitor<'a, G> { forbidden_conditions: SmallBitmap::for_interned_values_in( &graph.conditions_interner, ), - forbidden_conditions_to_nodes: SmallBitmap::for_interned_values_in( - &graph.query_graph.nodes, - ), + nodes_to_skip: SmallBitmap::for_interned_values_in(&graph.query_graph.nodes), }, ctx: VisitorContext { graph, all_costs_from_node, dead_ends_cache }, } } + /// See module documentation pub fn visit_paths(mut self, visit: VisitFn) -> Result<()> { let _ = self.state.visit_node(self.ctx.graph.query_graph.root_node, visit, &mut self.ctx)?; @@ -72,22 +125,31 @@ impl<'a, G: RankingRuleGraphTrait> PathVisitor<'a, G> { } impl VisitorState { + /// Visits a node: traverse all its valid conditional and unconditional edges. + /// + /// Returns ControlFlow::Break if the path finding algorithm should stop. + /// Returns whether a valid path was found from this node otherwise. fn visit_node( &mut self, from_node: Interned, visit: VisitFn, ctx: &mut VisitorContext, ) -> Result> { + // any valid path will be found from this point + // if a valid path was found, then we know that the DeadEndsCache may have been updated, + // and we will need to do more work to potentially backtrack let mut any_valid = false; let edges = ctx.graph.edges_of_node.get(from_node).clone(); for edge_idx in edges.iter() { + // could be none if the edge was deleted let Some(edge) = ctx.graph.edges_store.get(edge_idx).clone() else { continue }; if self.remaining_cost < edge.cost as u64 { continue; } self.remaining_cost -= edge.cost as u64; + let cf = match edge.condition { Some(condition) => self.visit_condition( condition, @@ -119,6 +181,10 @@ impl VisitorState { Ok(ControlFlow::Continue(any_valid)) } + /// Visits an unconditional edge. + /// + /// Returns ControlFlow::Break if the path finding algorithm should stop. + /// Returns whether a valid path was found from this node otherwise. fn visit_no_condition( &mut self, dest_node: Interned, @@ -134,20 +200,29 @@ impl VisitorState { { return Ok(ControlFlow::Continue(false)); } + // We've reached the END node! if dest_node == ctx.graph.query_graph.end_node { let control_flow = visit(&self.path, ctx.graph, ctx.dead_ends_cache)?; + // We could change the return type of the visit closure such that the caller + // tells us whether the dead ends cache was updated or not. + // Alternatively, maybe the DeadEndsCache should have a generation number + // to it, so that we don't need to play with these booleans at all. match control_flow { ControlFlow::Continue(_) => Ok(ControlFlow::Continue(true)), ControlFlow::Break(_) => Ok(ControlFlow::Break(())), } } else { - let old_fbct = self.forbidden_conditions_to_nodes.clone(); - self.forbidden_conditions_to_nodes.union(edge_new_nodes_to_skip); + let old_fbct = self.nodes_to_skip.clone(); + self.nodes_to_skip.union(edge_new_nodes_to_skip); let cf = self.visit_node(dest_node, visit, ctx)?; - self.forbidden_conditions_to_nodes = old_fbct; + self.nodes_to_skip = old_fbct; Ok(cf) } } + /// Visits a conditional edge. + /// + /// Returns ControlFlow::Break if the path finding algorithm should stop. + /// Returns whether a valid path was found from this node otherwise. fn visit_condition( &mut self, condition: Interned, @@ -159,7 +234,7 @@ impl VisitorState { assert!(dest_node != ctx.graph.query_graph.end_node); if self.forbidden_conditions.contains(condition) - || self.forbidden_conditions_to_nodes.contains(dest_node) + || self.nodes_to_skip.contains(dest_node) || edge_new_nodes_to_skip.intersects(&self.visited_nodes) { return Ok(ControlFlow::Continue(false)); @@ -180,19 +255,19 @@ impl VisitorState { self.visited_nodes.insert(dest_node); self.visited_conditions.insert(condition); - let old_fc = self.forbidden_conditions.clone(); + let old_forb_cond = self.forbidden_conditions.clone(); if let Some(next_forbidden) = ctx.dead_ends_cache.forbidden_conditions_after_prefix(self.path.iter().copied()) { self.forbidden_conditions.union(&next_forbidden); } - let old_fctn = self.forbidden_conditions_to_nodes.clone(); - self.forbidden_conditions_to_nodes.union(edge_new_nodes_to_skip); + let old_nodes_to_skip = self.nodes_to_skip.clone(); + self.nodes_to_skip.union(edge_new_nodes_to_skip); let cf = self.visit_node(dest_node, visit, ctx)?; - self.forbidden_conditions_to_nodes = old_fctn; - self.forbidden_conditions = old_fc; + self.nodes_to_skip = old_nodes_to_skip; + self.forbidden_conditions = old_forb_cond; self.visited_conditions.remove(condition); self.visited_nodes.remove(dest_node); diff --git a/milli/src/search/new/ranking_rule_graph/condition_docids_cache.rs b/milli/src/search/new/ranking_rule_graph/condition_docids_cache.rs index d0fcd8bd8..5d199c82a 100644 --- a/milli/src/search/new/ranking_rule_graph/condition_docids_cache.rs +++ b/milli/src/search/new/ranking_rule_graph/condition_docids_cache.rs @@ -9,12 +9,8 @@ use crate::search::new::query_term::LocatedQueryTermSubset; use crate::search::new::SearchContext; use crate::Result; -// TODO: give a generation to each universe, then be able to get the exact -// delta of docids between two universes of different generations! - /// A cache storing the document ids associated with each ranking rule edge pub struct ConditionDocIdsCache { - // TOOD: should be a mapped interner? pub cache: FxHashMap, ComputedCondition>, _phantom: PhantomData, } @@ -54,7 +50,7 @@ impl ConditionDocIdsCache { } let condition = graph.conditions_interner.get_mut(interned_condition); let computed = G::resolve_condition(ctx, condition, universe)?; - // TODO: if computed.universe_len != universe.len() ? + // Can we put an assert here for computed.universe_len == universe.len() ? let _ = self.cache.insert(interned_condition, computed); let computed = &self.cache[&interned_condition]; Ok(computed) diff --git a/milli/src/search/new/ranking_rule_graph/dead_ends_cache.rs b/milli/src/search/new/ranking_rule_graph/dead_ends_cache.rs index 4bbf91fcd..bac25da82 100644 --- a/milli/src/search/new/ranking_rule_graph/dead_ends_cache.rs +++ b/milli/src/search/new/ranking_rule_graph/dead_ends_cache.rs @@ -2,6 +2,7 @@ use crate::search::new::interner::{FixedSizeInterner, Interned}; use crate::search::new::small_bitmap::SmallBitmap; pub struct DeadEndsCache { + // conditions and next could/should be part of the same vector conditions: Vec>, next: Vec, pub forbidden: SmallBitmap, @@ -27,7 +28,7 @@ impl DeadEndsCache { self.forbidden.insert(condition); } - pub fn advance(&mut self, condition: Interned) -> Option<&mut Self> { + fn advance(&mut self, condition: Interned) -> Option<&mut Self> { if let Some(idx) = self.conditions.iter().position(|c| *c == condition) { Some(&mut self.next[idx]) } else { diff --git a/milli/src/search/new/ranking_rule_graph/fid/mod.rs b/milli/src/search/new/ranking_rule_graph/fid/mod.rs index 0f2cceaec..e3ccf23fa 100644 --- a/milli/src/search/new/ranking_rule_graph/fid/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/fid/mod.rs @@ -69,14 +69,9 @@ impl RankingRuleGraphTrait for FidGraph { let mut edges = vec![]; for fid in all_fields { - // TODO: We can improve performances and relevancy by storing - // the term subsets associated to each field ids fetched. edges.push(( - fid as u32 * term.term_ids.len() as u32, // TODO improve the fid score i.e. fid^10. - conditions_interner.insert(FidCondition { - term: term.clone(), // TODO remove this ugly clone - fid, - }), + fid as u32 * term.term_ids.len() as u32, + conditions_interner.insert(FidCondition { term: term.clone(), fid }), )); } diff --git a/milli/src/search/new/ranking_rule_graph/position/mod.rs b/milli/src/search/new/ranking_rule_graph/position/mod.rs index 9b0b6478f..c2e3b9012 100644 --- a/milli/src/search/new/ranking_rule_graph/position/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/position/mod.rs @@ -94,14 +94,9 @@ impl RankingRuleGraphTrait for PositionGraph { let mut edges = vec![]; for (cost, positions) in positions_for_costs { - // TODO: We can improve performances and relevancy by storing - // the term subsets associated to each position fetched edges.push(( cost, - conditions_interner.insert(PositionCondition { - term: term.clone(), // TODO remove this ugly clone - positions, - }), + conditions_interner.insert(PositionCondition { term: term.clone(), positions }), )); } diff --git a/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs b/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs index 057779a22..29a1876b4 100644 --- a/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs +++ b/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs @@ -65,13 +65,6 @@ pub fn compute_docids( } } - // TODO: add safeguard in case the cartesian product is too large! - // even if we restrict the word derivations to a maximum of 100, the size of the - // caterisan product could reach a maximum of 10_000 derivations, which is way too much. - // Maybe prioritise the product of zero typo derivations, then the product of zero-typo/one-typo - // + one-typo/zero-typo, then one-typo/one-typo, then ... until an arbitrary limit has been - // reached - for (left_phrase, left_word) in last_words_of_term_derivations(ctx, &left_term.term_subset)? { // Before computing the edges, check that the left word and left phrase // aren't disjoint with the universe, but only do it if there is more than @@ -111,8 +104,6 @@ pub fn compute_docids( Ok(ComputedCondition { docids, universe_len: universe.len(), - // TODO: think about whether we want to reduce the subset, - // we probably should! start_term_subset: Some(left_term.clone()), end_term_subset: right_term.clone(), }) @@ -203,12 +194,7 @@ fn compute_non_prefix_edges( *docids |= new_docids; } } - if backward_proximity >= 1 - // TODO: for now, we don't do any swapping when either term is a phrase - // but maybe we should. We'd need to look at the first/last word of the phrase - // depending on the context. - && left_phrase.is_none() && right_phrase.is_none() - { + if backward_proximity >= 1 && left_phrase.is_none() && right_phrase.is_none() { if let Some(new_docids) = ctx.get_db_word_pair_proximity_docids(word2, word1, backward_proximity)? { diff --git a/milli/src/search/new/resolve_query_graph.rs b/milli/src/search/new/resolve_query_graph.rs index 797db5875..d992cd22f 100644 --- a/milli/src/search/new/resolve_query_graph.rs +++ b/milli/src/search/new/resolve_query_graph.rs @@ -33,8 +33,6 @@ pub fn compute_query_term_subset_docids( ctx: &mut SearchContext, term: &QueryTermSubset, ) -> Result { - // TODO Use the roaring::MultiOps trait - let mut docids = RoaringBitmap::new(); for word in term.all_single_words_except_prefix_db(ctx)? { if let Some(word_docids) = ctx.word_docids(word)? { @@ -59,8 +57,6 @@ pub fn compute_query_term_subset_docids_within_field_id( term: &QueryTermSubset, fid: u16, ) -> Result { - // TODO Use the roaring::MultiOps trait - let mut docids = RoaringBitmap::new(); for word in term.all_single_words_except_prefix_db(ctx)? { if let Some(word_fid_docids) = ctx.get_db_word_fid_docids(word.interned(), fid)? { @@ -71,7 +67,6 @@ pub fn compute_query_term_subset_docids_within_field_id( for phrase in term.all_phrases(ctx)? { // There may be false positives when resolving a phrase, so we're not // guaranteed that all of its words are within a single fid. - // TODO: fix this? if let Some(word) = phrase.words(ctx).iter().flatten().next() { if let Some(word_fid_docids) = ctx.get_db_word_fid_docids(*word, fid)? { docids |= ctx.get_phrase_docids(phrase)? & word_fid_docids; @@ -95,7 +90,6 @@ pub fn compute_query_term_subset_docids_within_position( term: &QueryTermSubset, position: u16, ) -> Result { - // TODO Use the roaring::MultiOps trait let mut docids = RoaringBitmap::new(); for word in term.all_single_words_except_prefix_db(ctx)? { if let Some(word_position_docids) = @@ -108,7 +102,6 @@ pub fn compute_query_term_subset_docids_within_position( for phrase in term.all_phrases(ctx)? { // It's difficult to know the expected position of the words in the phrase, // so instead we just check the first one. - // TODO: fix this? if let Some(word) = phrase.words(ctx).iter().flatten().next() { if let Some(word_position_docids) = ctx.get_db_word_position_docids(*word, position)? { docids |= ctx.get_phrase_docids(phrase)? & word_position_docids @@ -132,9 +125,6 @@ pub fn compute_query_graph_docids( q: &QueryGraph, universe: &RoaringBitmap, ) -> Result { - // TODO: there must be a faster way to compute this big - // roaring bitmap expression - let mut nodes_resolved = SmallBitmap::for_interned_values_in(&q.nodes); let mut path_nodes_docids = q.nodes.map(|_| RoaringBitmap::new()); diff --git a/milli/src/search/new/sort.rs b/milli/src/search/new/sort.rs index 53144d00d..3f57b2aa5 100644 --- a/milli/src/search/new/sort.rs +++ b/milli/src/search/new/sort.rs @@ -141,10 +141,6 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx, universe: &RoaringBitmap, ) -> Result>> { let iter = self.iter.as_mut().unwrap(); - // TODO: we should make use of the universe in the function below - // good for correctness, but ideally iter.next_bucket would take the current universe into account, - // as right now it could return buckets that don't intersect with the universe, meaning we will make many - // unneeded calls. if let Some(mut bucket) = iter.next_bucket()? { bucket.candidates &= universe; Ok(Some(bucket)) diff --git a/milli/src/search/new/tests/distinct.rs b/milli/src/search/new/tests/distinct.rs index 2c147d514..c54600f27 100644 --- a/milli/src/search/new/tests/distinct.rs +++ b/milli/src/search/new/tests/distinct.rs @@ -527,7 +527,7 @@ fn test_distinct_all_candidates() { let SearchResult { documents_ids, candidates, .. } = s.execute().unwrap(); let candidates = candidates.iter().collect::>(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[14, 26, 4, 7, 17, 23, 1, 19, 25, 8, 20, 24]"); - // TODO: this is incorrect! + // This is incorrect, but unfortunately impossible to do better efficiently. insta::assert_snapshot!(format!("{candidates:?}"), @"[1, 4, 7, 8, 14, 17, 19, 20, 23, 24, 25, 26]"); } diff --git a/milli/src/search/new/tests/proximity.rs b/milli/src/search/new/tests/proximity.rs index 401508866..6e4181a95 100644 --- a/milli/src/search/new/tests/proximity.rs +++ b/milli/src/search/new/tests/proximity.rs @@ -122,11 +122,11 @@ fn create_edge_cases_index() -> TempIndex { sta stb stc ste stf stg sth sti stj stk stl stm stn sto stp stq str stst stt stu stv stw stx sty stz " }, - // The next 5 documents lay out a trap with the split word, phrase search, or synonym `sun flower`. - // If the search query is "sunflower", the split word "Sun Flower" will match some documents. + // The next 5 documents lay out a trap with the split word, phrase search, or synonym `sun flower`. + // If the search query is "sunflower", the split word "Sun Flower" will match some documents. // If the query is `sunflower wilting`, then we should make sure that - // the sprximity condition `flower wilting: sprx N` also comes with the condition - // `sun wilting: sprx N+1`. TODO: this is not the exact condition we use for now. + // the proximity condition `flower wilting: sprx N` also comes with the condition + // `sun wilting: sprx N+1`, but this is not the exact condition we use for now. // We only check that the phrase `sun flower` exists and `flower wilting: sprx N`, which // is better than nothing but not the best. { @@ -139,7 +139,7 @@ fn create_edge_cases_index() -> TempIndex { }, { "id": 3, - // This document matches the query `sunflower wilting`, but the sprximity condition + // This document matches the query `sunflower wilting`, but the sprximity condition // between `sunflower` and `wilting` cannot be through the split-word `Sun Flower` // which would reduce to only `flower` and `wilting` being in sprximity. "text": "A flower wilting under the sun, unlike a sunflower" @@ -299,7 +299,7 @@ fn test_proximity_split_word() { let SearchResult { documents_ids, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 4, 5, 1, 3]"); let texts = collect_field_values(&index, &txn, "text", &documents_ids); - // TODO: "2" and "4" should be swapped ideally + // "2" and "4" should be swapped ideally insta::assert_debug_snapshot!(texts, @r###" [ "\"Sun Flower sounds like the title of a painting, maybe about a flower wilting under the heat.\"", @@ -316,7 +316,7 @@ fn test_proximity_split_word() { let SearchResult { documents_ids, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 4, 1]"); let texts = collect_field_values(&index, &txn, "text", &documents_ids); - // TODO: "2" and "4" should be swapped ideally + // "2" and "4" should be swapped ideally insta::assert_debug_snapshot!(texts, @r###" [ "\"Sun Flower sounds like the title of a painting, maybe about a flower wilting under the heat.\"", @@ -341,7 +341,7 @@ fn test_proximity_split_word() { let SearchResult { documents_ids, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 4, 1]"); let texts = collect_field_values(&index, &txn, "text", &documents_ids); - // TODO: "2" and "4" should be swapped ideally + // "2" and "4" should be swapped ideally insta::assert_debug_snapshot!(texts, @r###" [ "\"Sun Flower sounds like the title of a painting, maybe about a flower wilting under the heat.\"", diff --git a/milli/src/search/new/tests/proximity_typo.rs b/milli/src/search/new/tests/proximity_typo.rs index ab98f99c0..b459b178b 100644 --- a/milli/src/search/new/tests/proximity_typo.rs +++ b/milli/src/search/new/tests/proximity_typo.rs @@ -2,9 +2,8 @@ This module tests the interactions between the proximity and typo ranking rules. The proximity ranking rule should transform the query graph such that it -only contains the word pairs that it used to compute its bucket. - -TODO: This is not currently implemented. +only contains the word pairs that it used to compute its bucket, but this is not currently +implemented. */ use crate::index::tests::TempIndex; @@ -64,7 +63,7 @@ fn test_trap_basic() { let SearchResult { documents_ids, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1]"); let texts = collect_field_values(&index, &txn, "text", &documents_ids); - // TODO: this is incorrect, 1 should come before 0 + // This is incorrect, 1 should come before 0 insta::assert_debug_snapshot!(texts, @r###" [ "\"summer. holiday. sommer holidty\"", diff --git a/milli/src/search/new/tests/typo.rs b/milli/src/search/new/tests/typo.rs index 8fd9de5fc..536f6653d 100644 --- a/milli/src/search/new/tests/typo.rs +++ b/milli/src/search/new/tests/typo.rs @@ -571,8 +571,8 @@ fn test_typo_synonyms() { s.terms_matching_strategy(TermsMatchingStrategy::All); s.query("the fast brownish fox jumps over the lackadaisical dog"); - // TODO: is this correct? interaction of ngrams + synonyms means that the - // multi-word synonyms end up having a typo cost. This is probably not what we want. + // The interaction of ngrams + synonyms means that the multi-word synonyms end up having a typo cost. + // This is probably not what we want. let SearchResult { documents_ids, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[21, 0, 22]"); let texts = collect_field_values(&index, &txn, "text", &documents_ids); From d9b4b399222b135821c1ef83a5ce8236b71d13ed Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 14 Jun 2023 13:30:31 +0200 Subject: [PATCH 70/73] Add trailing pipe to the snapshots so it doesn't end with trailing whitespace --- milli/src/snapshot_tests.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/milli/src/snapshot_tests.rs b/milli/src/snapshot_tests.rs index 25c1088b9..158f515b8 100644 --- a/milli/src/snapshot_tests.rs +++ b/milli/src/snapshot_tests.rs @@ -318,7 +318,7 @@ pub fn snap_field_distributions(index: &Index) -> String { let rtxn = index.read_txn().unwrap(); let mut snap = String::new(); for (field, count) in index.field_distribution(&rtxn).unwrap() { - writeln!(&mut snap, "{field:<16} {count:<6}").unwrap(); + writeln!(&mut snap, "{field:<16} {count:<6} |").unwrap(); } snap } @@ -328,7 +328,7 @@ pub fn snap_fields_ids_map(index: &Index) -> String { let mut snap = String::new(); for field_id in fields_ids_map.ids() { let name = fields_ids_map.name(field_id).unwrap(); - writeln!(&mut snap, "{field_id:<3} {name:<16}").unwrap(); + writeln!(&mut snap, "{field_id:<3} {name:<16} |").unwrap(); } snap } From e0c4682758a754a1854bcf8868f2012c00d2a963 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 14 Jun 2023 13:30:14 +0200 Subject: [PATCH 71/73] Fix tests --- milli/src/index.rs | 18 +++++++++--------- .../1/field_distribution.snap | 6 +++--- .../field_distribution.snap | 6 +++--- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/milli/src/index.rs b/milli/src/index.rs index 0d74e0732..1ccef13dd 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -1466,9 +1466,9 @@ pub(crate) mod tests { db_snap!(index, field_distribution, @r###" - age 1 - id 2 - name 2 + age 1 | + id 2 | + name 2 | "### ); @@ -1486,9 +1486,9 @@ pub(crate) mod tests { db_snap!(index, field_distribution, @r###" - age 1 - id 2 - name 2 + age 1 | + id 2 | + name 2 | "### ); @@ -1502,9 +1502,9 @@ pub(crate) mod tests { db_snap!(index, field_distribution, @r###" - has_dog 1 - id 2 - name 2 + has_dog 1 | + id 2 | + name 2 | "### ); } diff --git a/milli/src/snapshots/index.rs/initial_field_distribution/1/field_distribution.snap b/milli/src/snapshots/index.rs/initial_field_distribution/1/field_distribution.snap index 9b074fb59..1d1d629e6 100644 --- a/milli/src/snapshots/index.rs/initial_field_distribution/1/field_distribution.snap +++ b/milli/src/snapshots/index.rs/initial_field_distribution/1/field_distribution.snap @@ -1,7 +1,7 @@ --- source: milli/src/index.rs --- -age 1 -id 2 -name 2 +age 1 | +id 2 | +name 2 | diff --git a/milli/src/snapshots/index.rs/initial_field_distribution/field_distribution.snap b/milli/src/snapshots/index.rs/initial_field_distribution/field_distribution.snap index 9b074fb59..1d1d629e6 100644 --- a/milli/src/snapshots/index.rs/initial_field_distribution/field_distribution.snap +++ b/milli/src/snapshots/index.rs/initial_field_distribution/field_distribution.snap @@ -1,7 +1,7 @@ --- source: milli/src/index.rs --- -age 1 -id 2 -name 2 +age 1 | +id 2 | +name 2 | From bbc9f68ff5009dfcf6e84dc60cdbe3cc3f2a88b3 Mon Sep 17 00:00:00 2001 From: curquiza Date: Mon, 19 Jun 2023 18:49:15 +0200 Subject: [PATCH 72/73] Use the input from the previous job instead of the workflow dispatch --- .github/workflows/sdks-tests.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/sdks-tests.yml b/.github/workflows/sdks-tests.yml index f625ab333..78c45f1b2 100644 --- a/.github/workflows/sdks-tests.yml +++ b/.github/workflows/sdks-tests.yml @@ -25,7 +25,7 @@ jobs: - name: Define the Docker image we need to use id: define-image run: | - event=${{ github.event.action }} + event=${{ github.event_name }} echo "docker-image=nightly" >> $GITHUB_OUTPUT if [[ $event == 'workflow_dispatch' ]]; then echo "docker-image=${{ github.event.inputs.docker_image }}" >> $GITHUB_OUTPUT @@ -37,7 +37,7 @@ jobs: runs-on: ubuntu-latest services: meilisearch: - image: getmeili/meilisearch:${{ github.event.inputs.docker_image }} + image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }} env: MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }} MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }} @@ -72,7 +72,7 @@ jobs: runs-on: ubuntu-latest services: meilisearch: - image: getmeili/meilisearch:${{ github.event.inputs.docker_image }} + image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }} env: MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }} MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }} @@ -99,7 +99,7 @@ jobs: runs-on: ubuntu-latest services: meilisearch: - image: getmeili/meilisearch:${{ github.event.inputs.docker_image }} + image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }} env: MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }} MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }} @@ -130,7 +130,7 @@ jobs: runs-on: ubuntu-latest services: meilisearch: - image: getmeili/meilisearch:${{ github.event.inputs.docker_image }} + image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }} env: MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }} MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }} @@ -155,7 +155,7 @@ jobs: runs-on: ubuntu-latest services: meilisearch: - image: getmeili/meilisearch:${{ github.event.inputs.docker_image }} + image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }} env: MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }} MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }} @@ -185,7 +185,7 @@ jobs: runs-on: ubuntu-latest services: meilisearch: - image: getmeili/meilisearch:${{ github.event.inputs.docker_image }} + image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }} env: MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }} MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }} @@ -210,7 +210,7 @@ jobs: runs-on: ubuntu-latest services: meilisearch: - image: getmeili/meilisearch:${{ github.event.inputs.docker_image }} + image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }} env: MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }} MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }} From cfed349aa37495b07bea0f4fd26842c0ce496d17 Mon Sep 17 00:00:00 2001 From: "Valeriy V. Vorotyntsev" Date: Tue, 30 May 2023 00:59:38 +0300 Subject: [PATCH 73/73] Fix error messages in `check-release.sh` - `check_tag`: Report file name correctly. Use named variables. - Introduce `read_version` helper function. Simplify the implementation. - Show meaningful error message if `GITHUB_REF` is not set or its format is incorrect. --- .github/scripts/check-release.sh | 47 ++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 15 deletions(-) diff --git a/.github/scripts/check-release.sh b/.github/scripts/check-release.sh index b0b272226..d9cb128b5 100644 --- a/.github/scripts/check-release.sh +++ b/.github/scripts/check-release.sh @@ -1,24 +1,41 @@ -#!/bin/bash +#!/usr/bin/env bash +set -eu -o pipefail -# check_tag $current_tag $file_tag $file_name -function check_tag { - if [[ "$1" != "$2" ]]; then - echo "Error: the current tag does not match the version in Cargo.toml: found $2 - expected $1" - ret=1 - fi +check_tag() { + local expected=$1 + local actual=$2 + local filename=$3 + + if [[ $actual != $expected ]]; then + echo >&2 "Error: the current tag does not match the version in $filename: found $actual, expected $expected" + return 1 + fi } +read_version() { + grep '^version = ' | cut -d \" -f 2 +} + +if [[ -z "${GITHUB_REF:-}" ]]; then + echo >&2 "Error: GITHUB_REF is not set" + exit 1 +fi + +if [[ ! "$GITHUB_REF" =~ ^refs/tags/v[0-9]+\.[0-9]+\.[0-9]+(-[a-z0-9]+)?$ ]]; then + echo >&2 "Error: GITHUB_REF is not a valid tag: $GITHUB_REF" + exit 1 +fi + +current_tag=${GITHUB_REF#refs/tags/v} ret=0 -current_tag=${GITHUB_REF#'refs/tags/v'} -file_tag="$(grep '^version = ' Cargo.toml | cut -d '=' -f 2 | tr -d '"' | tr -d ' ')" -check_tag $current_tag $file_tag +toml_tag="$(cat Cargo.toml | read_version)" +check_tag "$current_tag" "$toml_tag" Cargo.toml || ret=1 -lock_file='Cargo.lock' -lock_tag=$(grep -A 1 'name = "meilisearch-auth"' $lock_file | grep version | cut -d '=' -f 2 | tr -d '"' | tr -d ' ') -check_tag $current_tag $lock_tag $lock_file +lock_tag=$(grep -A 1 '^name = "meilisearch-auth"' Cargo.lock | read_version) +check_tag "$current_tag" "$lock_tag" Cargo.lock || ret=1 -if [[ "$ret" -eq 0 ]] ; then - echo 'OK' +if (( ret == 0 )); then + echo 'OK' fi exit $ret