From 81f343a46ae2274178b35740b50ab74affbcfbd9 Mon Sep 17 00:00:00 2001 From: mpostma Date: Fri, 8 Jan 2021 16:23:23 +0100 Subject: [PATCH] add word limit to search queries --- meilisearch-core/src/lib.rs | 1 + meilisearch-core/src/query_tree.rs | 3 +++ meilisearch-http/tests/search.rs | 29 +++++++++++++++++++++++++++++ 3 files changed, 33 insertions(+) diff --git a/meilisearch-core/src/lib.rs b/meilisearch-core/src/lib.rs index bcdad62b1..947ad5fb7 100644 --- a/meilisearch-core/src/lib.rs +++ b/meilisearch-core/src/lib.rs @@ -39,6 +39,7 @@ pub use self::update::{EnqueuedUpdateResult, ProcessedUpdateResult, UpdateStatus pub use meilisearch_types::{DocIndex, DocumentId, Highlight}; pub use meilisearch_schema::Schema; pub use query_words_mapper::QueryWordsMapper; +pub use query_tree::MAX_QUERY_LEN; use compact_arena::SmallArena; use log::{error, trace}; diff --git a/meilisearch-core/src/query_tree.rs b/meilisearch-core/src/query_tree.rs index c2f43818f..5d10e9bef 100644 --- a/meilisearch-core/src/query_tree.rs +++ b/meilisearch-core/src/query_tree.rs @@ -16,6 +16,8 @@ use crate::{store, DocumentId, DocIndex, MResult, FstSetCow}; use crate::automaton::{build_dfa, build_prefix_dfa, build_exact_dfa}; use crate::QueryWordsMapper; +pub const MAX_QUERY_LEN: usize = 10; + #[derive(Clone, PartialEq, Eq, Hash)] pub enum Operation { And(Vec), @@ -181,6 +183,7 @@ fn split_query_string<'a, A: AsRef<[u8]>>(s: &str, stop_words: &'a fst::Set) .tokens() .filter(|t| t.is_word()) .map(|t| t.word.to_string()) + .take(MAX_QUERY_LEN) .enumerate() .collect() } diff --git a/meilisearch-http/tests/search.rs b/meilisearch-http/tests/search.rs index 9da6b964e..13dc4c898 100644 --- a/meilisearch-http/tests/search.rs +++ b/meilisearch-http/tests/search.rs @@ -1945,3 +1945,32 @@ async fn test_filter_nb_hits_search_normal() { println!("result: {}", response); assert_eq!(response["nbHits"], 1); } + +#[actix_rt::test] +async fn test_max_word_query() { + use meilisearch_core::MAX_QUERY_LEN; + + let mut server = common::Server::with_uid("test"); + let body = json!({ + "uid": "test", + "primaryKey": "id", + }); + server.create_index(body).await; + let documents = json!([ + {"id": 1, "value": "1 2 3 4 5 6 7 8 9 10 11"}, + {"id": 2, "value": "1 2 3 4 5 6 7 8 9 10"}] + ); + server.add_or_update_multiple_documents(documents).await; + + // We want to create a request where the 11 will be ignored. We have 2 documents, where a query + // with only one should return both, but a query with 1 and 11 should return only the first. + // This is how we know that outstanding query words have been ignored + let query = (0..MAX_QUERY_LEN) + .map(|_| "1") + .chain(std::iter::once("11")) + .fold(String::new(), |s, w| s + " " + w); + let (response, _) = server.search_post(json!({"q": query})).await; + assert_eq!(response["nbHits"], 2); + let (response, _) = server.search_post(json!({"q": "1 11"})).await; + assert_eq!(response["nbHits"], 1); +}