add word limit to search queries

This commit is contained in:
mpostma 2021-01-08 16:23:23 +01:00
parent fa40c6e3d4
commit 81f343a46a
No known key found for this signature in database
GPG Key ID: CBC8A7C1D7A28C3A
3 changed files with 33 additions and 0 deletions

View File

@ -39,6 +39,7 @@ pub use self::update::{EnqueuedUpdateResult, ProcessedUpdateResult, UpdateStatus
pub use meilisearch_types::{DocIndex, DocumentId, Highlight}; pub use meilisearch_types::{DocIndex, DocumentId, Highlight};
pub use meilisearch_schema::Schema; pub use meilisearch_schema::Schema;
pub use query_words_mapper::QueryWordsMapper; pub use query_words_mapper::QueryWordsMapper;
pub use query_tree::MAX_QUERY_LEN;
use compact_arena::SmallArena; use compact_arena::SmallArena;
use log::{error, trace}; use log::{error, trace};

View File

@ -16,6 +16,8 @@ use crate::{store, DocumentId, DocIndex, MResult, FstSetCow};
use crate::automaton::{build_dfa, build_prefix_dfa, build_exact_dfa}; use crate::automaton::{build_dfa, build_prefix_dfa, build_exact_dfa};
use crate::QueryWordsMapper; use crate::QueryWordsMapper;
pub const MAX_QUERY_LEN: usize = 10;
#[derive(Clone, PartialEq, Eq, Hash)] #[derive(Clone, PartialEq, Eq, Hash)]
pub enum Operation { pub enum Operation {
And(Vec<Operation>), And(Vec<Operation>),
@ -181,6 +183,7 @@ fn split_query_string<'a, A: AsRef<[u8]>>(s: &str, stop_words: &'a fst::Set<A>)
.tokens() .tokens()
.filter(|t| t.is_word()) .filter(|t| t.is_word())
.map(|t| t.word.to_string()) .map(|t| t.word.to_string())
.take(MAX_QUERY_LEN)
.enumerate() .enumerate()
.collect() .collect()
} }

View File

@ -1945,3 +1945,32 @@ async fn test_filter_nb_hits_search_normal() {
println!("result: {}", response); println!("result: {}", response);
assert_eq!(response["nbHits"], 1); assert_eq!(response["nbHits"], 1);
} }
#[actix_rt::test]
async fn test_max_word_query() {
use meilisearch_core::MAX_QUERY_LEN;
let mut server = common::Server::with_uid("test");
let body = json!({
"uid": "test",
"primaryKey": "id",
});
server.create_index(body).await;
let documents = json!([
{"id": 1, "value": "1 2 3 4 5 6 7 8 9 10 11"},
{"id": 2, "value": "1 2 3 4 5 6 7 8 9 10"}]
);
server.add_or_update_multiple_documents(documents).await;
// We want to create a request where the 11 will be ignored. We have 2 documents, where a query
// with only one should return both, but a query with 1 and 11 should return only the first.
// This is how we know that outstanding query words have been ignored
let query = (0..MAX_QUERY_LEN)
.map(|_| "1")
.chain(std::iter::once("11"))
.fold(String::new(), |s, w| s + " " + w);
let (response, _) = server.search_post(json!({"q": query})).await;
assert_eq!(response["nbHits"], 2);
let (response, _) = server.search_post(json!({"q": "1 11"})).await;
assert_eq!(response["nbHits"], 1);
}