diff --git a/Cargo.lock b/Cargo.lock index 08914344d..afdf18a9b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1754,7 +1754,7 @@ dependencies = [ [[package]] name = "meilisearch-tokenizer" version = "0.1.1" -source = "git+https://github.com/meilisearch/Tokenizer.git?tag=v0.1.2#8d91cd52f30aa4b651a085c15056938f7b599646" +source = "git+https://github.com/meilisearch/Tokenizer.git?tag=v0.1.3#d3fe5311a66c1f31682a297df8a8b6b8916f4252" dependencies = [ "character_converter", "cow-utils", diff --git a/LICENSE b/LICENSE index 8f028e3f2..03ad189ef 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2019-2020 Meili SAS +Copyright (c) 2019-2021 Meili SAS Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index f37d27abd..e8b2ec9e3 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ For more information about features go to [our documentation](https://docs.meili ### Deploy the Server -#### Brew (Mac OS) +#### Homebrew (Mac OS) ```bash brew update && brew install meilisearch diff --git a/meilisearch-core/Cargo.toml b/meilisearch-core/Cargo.toml index 7d930096c..3fe030e9f 100644 --- a/meilisearch-core/Cargo.toml +++ b/meilisearch-core/Cargo.toml @@ -26,7 +26,7 @@ levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] } log = "0.4.11" meilisearch-error = { path = "../meilisearch-error", version = "0.18.1" } meilisearch-schema = { path = "../meilisearch-schema", version = "0.18.1" } -meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", tag = "v0.1.2" } +meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", tag = "v0.1.3" } meilisearch-types = { path = "../meilisearch-types", version = "0.18.1" } once_cell = "1.5.2" ordered-float = { version = "2.0.1", features = ["serde"] } diff --git a/meilisearch-core/src/lib.rs b/meilisearch-core/src/lib.rs index bcdad62b1..947ad5fb7 100644 --- a/meilisearch-core/src/lib.rs +++ b/meilisearch-core/src/lib.rs @@ -39,6 +39,7 @@ pub use self::update::{EnqueuedUpdateResult, ProcessedUpdateResult, UpdateStatus pub use meilisearch_types::{DocIndex, DocumentId, Highlight}; pub use meilisearch_schema::Schema; pub use query_words_mapper::QueryWordsMapper; +pub use query_tree::MAX_QUERY_LEN; use compact_arena::SmallArena; use log::{error, trace}; diff --git a/meilisearch-core/src/query_tree.rs b/meilisearch-core/src/query_tree.rs index c2f43818f..5d10e9bef 100644 --- a/meilisearch-core/src/query_tree.rs +++ b/meilisearch-core/src/query_tree.rs @@ -16,6 +16,8 @@ use crate::{store, DocumentId, DocIndex, MResult, FstSetCow}; use crate::automaton::{build_dfa, build_prefix_dfa, build_exact_dfa}; use crate::QueryWordsMapper; +pub const MAX_QUERY_LEN: usize = 10; + #[derive(Clone, PartialEq, Eq, Hash)] pub enum Operation { And(Vec), @@ -181,6 +183,7 @@ fn split_query_string<'a, A: AsRef<[u8]>>(s: &str, stop_words: &'a fst::Set) .tokens() .filter(|t| t.is_word()) .map(|t| t.word.to_string()) + .take(MAX_QUERY_LEN) .enumerate() .collect() } diff --git a/meilisearch-http/tests/search.rs b/meilisearch-http/tests/search.rs index 9da6b964e..13dc4c898 100644 --- a/meilisearch-http/tests/search.rs +++ b/meilisearch-http/tests/search.rs @@ -1945,3 +1945,32 @@ async fn test_filter_nb_hits_search_normal() { println!("result: {}", response); assert_eq!(response["nbHits"], 1); } + +#[actix_rt::test] +async fn test_max_word_query() { + use meilisearch_core::MAX_QUERY_LEN; + + let mut server = common::Server::with_uid("test"); + let body = json!({ + "uid": "test", + "primaryKey": "id", + }); + server.create_index(body).await; + let documents = json!([ + {"id": 1, "value": "1 2 3 4 5 6 7 8 9 10 11"}, + {"id": 2, "value": "1 2 3 4 5 6 7 8 9 10"}] + ); + server.add_or_update_multiple_documents(documents).await; + + // We want to create a request where the 11 will be ignored. We have 2 documents, where a query + // with only one should return both, but a query with 1 and 11 should return only the first. + // This is how we know that outstanding query words have been ignored + let query = (0..MAX_QUERY_LEN) + .map(|_| "1") + .chain(std::iter::once("11")) + .fold(String::new(), |s, w| s + " " + w); + let (response, _) = server.search_post(json!({"q": query})).await; + assert_eq!(response["nbHits"], 2); + let (response, _) = server.search_post(json!({"q": "1 11"})).await; + assert_eq!(response["nbHits"], 1); +}