1174: Limit query words number r=MarinPostma a=MarinPostma

This pr adds a limit to the number of words taken into account in a search query. Using query string that are too long leads to huge performance hits and ressources consumtion, that occasionally crashes the machine. The limit has been hard set to 10, and tests have been added to make sure that it is taken into account.

close #941

Co-authored-by: mpostma <postma.marin@protonmail.com>
This commit is contained in:
bors[bot] 2021-01-28 17:38:34 +00:00 committed by GitHub
commit f37a420a04
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 33 additions and 0 deletions

View File

@ -39,6 +39,7 @@ pub use self::update::{EnqueuedUpdateResult, ProcessedUpdateResult, UpdateStatus
pub use meilisearch_types::{DocIndex, DocumentId, Highlight};
pub use meilisearch_schema::Schema;
pub use query_words_mapper::QueryWordsMapper;
pub use query_tree::MAX_QUERY_LEN;
use compact_arena::SmallArena;
use log::{error, trace};

View File

@ -16,6 +16,8 @@ use crate::{store, DocumentId, DocIndex, MResult, FstSetCow};
use crate::automaton::{build_dfa, build_prefix_dfa, build_exact_dfa};
use crate::QueryWordsMapper;
pub const MAX_QUERY_LEN: usize = 10;
#[derive(Clone, PartialEq, Eq, Hash)]
pub enum Operation {
And(Vec<Operation>),
@ -181,6 +183,7 @@ fn split_query_string<'a, A: AsRef<[u8]>>(s: &str, stop_words: &'a fst::Set<A>)
.tokens()
.filter(|t| t.is_word())
.map(|t| t.word.to_string())
.take(MAX_QUERY_LEN)
.enumerate()
.collect()
}

View File

@ -1945,3 +1945,32 @@ async fn test_filter_nb_hits_search_normal() {
println!("result: {}", response);
assert_eq!(response["nbHits"], 1);
}
#[actix_rt::test]
async fn test_max_word_query() {
use meilisearch_core::MAX_QUERY_LEN;
let mut server = common::Server::with_uid("test");
let body = json!({
"uid": "test",
"primaryKey": "id",
});
server.create_index(body).await;
let documents = json!([
{"id": 1, "value": "1 2 3 4 5 6 7 8 9 10 11"},
{"id": 2, "value": "1 2 3 4 5 6 7 8 9 10"}]
);
server.add_or_update_multiple_documents(documents).await;
// We want to create a request where the 11 will be ignored. We have 2 documents, where a query
// with only one should return both, but a query with 1 and 11 should return only the first.
// This is how we know that outstanding query words have been ignored
let query = (0..MAX_QUERY_LEN)
.map(|_| "1")
.chain(std::iter::once("11"))
.fold(String::new(), |s, w| s + " " + w);
let (response, _) = server.search_post(json!({"q": query})).await;
assert_eq!(response["nbHits"], 2);
let (response, _) = server.search_post(json!({"q": "1 11"})).await;
assert_eq!(response["nbHits"], 1);
}