1615: Integrate the query time sort feature r=Kerollmops a=Kerollmops

This pull request integrates the sort at query time feature that was implemented on the milli side https://github.com/meilisearch/milli/pull/320. It follows the specification file https://github.com/meilisearch/specifications/blob/develop/text/0055-sort.md.

A bunch of tests has been added to ensure that the search works correctly and that the settings are fine too!

Co-authored-by: Kerollmops <clement@meilisearch.com>
This commit is contained in:
bors[bot] 2021-08-26 14:09:38 +00:00 committed by GitHub
commit 96f72f009a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 161 additions and 37 deletions

17
Cargo.lock generated
View File

@ -1585,7 +1585,6 @@ dependencies = [
"fst",
"futures",
"futures-util",
"grenad",
"heed",
"hex",
"http",
@ -1676,8 +1675,8 @@ dependencies = [
[[package]]
name = "milli"
version = "0.10.2"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.10.2#879d5e8799836d93f8995810965b6797be4f69d1"
version = "0.11.0"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.11.0#c51bb6789cb3fbb6511138374b3443f9116a445c"
dependencies = [
"bstr",
"byteorder",
@ -1704,7 +1703,6 @@ dependencies = [
"pest 2.1.3 (git+https://github.com/pest-parser/pest.git?rev=51fd1d49f1041f7839975664ef71fe15c7dcaf67)",
"pest_derive",
"rayon",
"regex",
"roaring",
"serde",
"serde_json",
@ -1712,7 +1710,6 @@ dependencies = [
"smallstr",
"smallvec",
"tempfile",
"tinytemplate",
"uuid",
]
@ -2937,16 +2934,6 @@ dependencies = [
"syn 1.0.73",
]
[[package]]
name = "tinytemplate"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d3dc76004a03cec1c5932bca4cdc2e39aaa798e3f82363dd94f9adf6098c12f"
dependencies = [
"serde",
"serde_json",
]
[[package]]
name = "tinyvec"
version = "1.2.0"

View File

@ -63,6 +63,7 @@ pub enum Code {
Facet,
Filter,
Sort,
BadParameter,
BadRequest,
@ -116,6 +117,8 @@ impl Code {
Facet => ErrCode::invalid("invalid_facet", StatusCode::BAD_REQUEST),
// error related to filters
Filter => ErrCode::invalid("invalid_filter", StatusCode::BAD_REQUEST),
// error related to sorts
Sort => ErrCode::invalid("invalid_sort", StatusCode::BAD_REQUEST),
BadParameter => ErrCode::invalid("bad_parameter", StatusCode::BAD_REQUEST),
BadRequest => ErrCode::invalid("bad_request", StatusCode::BAD_REQUEST),

View File

@ -41,7 +41,6 @@ flate2 = "1.0.19"
fst = "0.4.5"
futures = "0.3.7"
futures-util = "0.3.8"
grenad = { git = "https://github.com/Kerollmops/grenad.git", rev = "3adcb26" }
heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.1" }
http = "0.2.1"
indexmap = { version = "1.3.2", features = ["serde-1"] }
@ -51,7 +50,7 @@ main_error = "0.1.0"
meilisearch-error = { path = "../meilisearch-error" }
meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.5" }
memmap = "0.7.0"
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.10.2" }
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.11.0" }
mime = "0.3.16"
num_cpus = "1.13.0"
once_cell = "1.5.2"

View File

@ -103,6 +103,7 @@ impl ErrorCode for MilliError<'_> {
UserError::PrimaryKeyCannotBeReset => Code::PrimaryKeyAlreadyPresent,
UserError::UnknownInternalDocumentId { .. } => Code::DocumentNotFound,
UserError::InvalidFacetsDistribution { .. } => Code::BadRequest,
UserError::InvalidSortableAttribute { .. } => Code::Sort,
}
}
}

View File

@ -63,6 +63,8 @@ impl Index {
let filterable_attributes = self.filterable_fields(txn)?.into_iter().collect();
let sortable_attributes = self.sortable_fields(txn)?.into_iter().collect();
let criteria = self
.criteria(txn)?
.into_iter()
@ -101,6 +103,7 @@ impl Index {
None => Setting::Reset,
},
filterable_attributes: Setting::Set(filterable_attributes),
sortable_attributes: Setting::Set(sortable_attributes),
ranking_rules: Setting::Set(criteria),
stop_words: Setting::Set(stop_words),
distinct_attribute: match distinct_field {

View File

@ -1,15 +1,17 @@
use std::collections::{BTreeMap, BTreeSet, HashSet};
use std::str::FromStr;
use std::time::Instant;
use either::Either;
use heed::RoTxn;
use indexmap::IndexMap;
use meilisearch_tokenizer::{Analyzer, AnalyzerConfig, Token};
use milli::{FieldId, FieldsIdsMap, FilterCondition, MatchingWords};
use milli::{AscDesc, FieldId, FieldsIdsMap, FilterCondition, MatchingWords};
use serde::{Deserialize, Serialize};
use serde_json::Value;
use crate::index::error::FacetError;
use crate::index::IndexError;
use super::error::Result;
use super::Index;
@ -49,6 +51,7 @@ pub struct SearchQuery {
#[serde(default = "Default::default")]
pub matches: bool,
pub filter: Option<Value>,
pub sort: Option<Vec<String>>,
pub facets_distribution: Option<Vec<String>>,
}
@ -104,6 +107,15 @@ impl Index {
}
}
if let Some(ref sort) = query.sort {
let sort = match sort.iter().map(|s| AscDesc::from_str(s)).collect() {
Ok(sorts) => sorts,
Err(err) => return Err(IndexError::Milli(err.into())),
};
search.sort_criteria(sort);
}
let milli::SearchResult {
documents_ids,
matching_words,

View File

@ -1,8 +1,8 @@
use std::fs::File;
use crate::index::Index;
use grenad::CompressionType;
use milli::update::UpdateBuilder;
use milli::CompressionType;
use rayon::ThreadPool;
use crate::index_controller::UpdateMeta;

View File

@ -55,7 +55,9 @@ pub struct Settings<T> {
pub searchable_attributes: Setting<Vec<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub filterable_attributes: Setting<HashSet<String>>,
pub filterable_attributes: Setting<BTreeSet<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub sortable_attributes: Setting<BTreeSet<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub ranking_rules: Setting<Vec<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
@ -75,6 +77,7 @@ impl Settings<Checked> {
displayed_attributes: Setting::Reset,
searchable_attributes: Setting::Reset,
filterable_attributes: Setting::Reset,
sortable_attributes: Setting::Reset,
ranking_rules: Setting::Reset,
stop_words: Setting::Reset,
synonyms: Setting::Reset,
@ -88,6 +91,7 @@ impl Settings<Checked> {
displayed_attributes,
searchable_attributes,
filterable_attributes,
sortable_attributes,
ranking_rules,
stop_words,
synonyms,
@ -99,6 +103,7 @@ impl Settings<Checked> {
displayed_attributes,
searchable_attributes,
filterable_attributes,
sortable_attributes,
ranking_rules,
stop_words,
synonyms,
@ -136,6 +141,7 @@ impl Settings<Unchecked> {
displayed_attributes,
searchable_attributes,
filterable_attributes: self.filterable_attributes,
sortable_attributes: self.sortable_attributes,
ranking_rules: self.ranking_rules,
stop_words: self.stop_words,
synonyms: self.synonyms,
@ -248,8 +254,21 @@ impl Index {
}
match settings.filterable_attributes {
Setting::Set(ref facet_types) => builder.set_filterable_fields(facet_types.clone()),
Setting::Reset => builder.set_filterable_fields(HashSet::new()),
Setting::Set(ref facets) => {
builder.set_filterable_fields(facets.clone().into_iter().collect())
}
Setting::Reset => builder.reset_filterable_fields(),
Setting::NotSet => (),
}
match settings.sortable_attributes {
Setting::Set(ref fields) => {
builder.set_sortable_fields(fields.iter().cloned().collect())
}
Setting::Reset => {
// TODO we must use the reset_sortable_fields in a futur PR.
builder.set_sortable_fields(HashSet::new())
}
Setting::NotSet => (),
}
@ -328,6 +347,7 @@ mod test {
displayed_attributes: Setting::Set(vec![String::from("hello")]),
searchable_attributes: Setting::Set(vec![String::from("hello")]),
filterable_attributes: Setting::NotSet,
sortable_attributes: Setting::NotSet,
ranking_rules: Setting::NotSet,
stop_words: Setting::NotSet,
synonyms: Setting::NotSet,
@ -348,6 +368,7 @@ mod test {
displayed_attributes: Setting::Set(vec![String::from("*")]),
searchable_attributes: Setting::Set(vec![String::from("hello"), String::from("*")]),
filterable_attributes: Setting::NotSet,
sortable_attributes: Setting::NotSet,
ranking_rules: Setting::NotSet,
stop_words: Setting::NotSet,
synonyms: Setting::NotSet,

View File

@ -158,15 +158,12 @@ impl From<Settings> for index_controller::Settings<Unchecked> {
Some(None) => Setting::Reset,
None => Setting::NotSet
},
// we previously had a `Vec<String>` but now we have a `HashMap<String, String>`
// representing the name of the faceted field + the type of the field. Since the type
// was not known in the V1 of the dump we are just going to assume everything is a
// String
filterable_attributes: match settings.attributes_for_faceting {
Some(Some(attrs)) => Setting::Set(attrs.into_iter().collect()),
Some(None) => Setting::Reset,
None => Setting::NotSet
},
sortable_attributes: Setting::NotSet,
// we need to convert the old `Vec<String>` into a `BTreeSet<String>`
ranking_rules: match settings.ranking_rules {
Some(Some(ranking_rules)) => Setting::Set(ranking_rules.into_iter().filter(|criterion| {

View File

@ -4,7 +4,7 @@ use std::sync::Arc;
use std::{error, fs};
use byte_unit::Byte;
use grenad::CompressionType;
use milli::CompressionType;
use rustls::internal::pemfile::{certs, pkcs8_private_keys, rsa_private_keys};
use rustls::{
AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient, NoClientAuth,

View File

@ -1,5 +1,3 @@
use std::collections::{BTreeSet, HashSet};
use actix_web::{web, HttpResponse};
use log::debug;
use serde::Deserialize;
@ -31,6 +29,7 @@ pub struct SearchQueryGet {
crop_length: usize,
attributes_to_highlight: Option<String>,
filter: Option<String>,
sort: Option<String>,
#[serde(default = "Default::default")]
matches: bool,
facets_distribution: Option<String>,
@ -40,19 +39,19 @@ impl From<SearchQueryGet> for SearchQuery {
fn from(other: SearchQueryGet) -> Self {
let attributes_to_retrieve = other
.attributes_to_retrieve
.map(|attrs| attrs.split(',').map(String::from).collect::<BTreeSet<_>>());
.map(|attrs| attrs.split(',').map(String::from).collect());
let attributes_to_crop = other
.attributes_to_crop
.map(|attrs| attrs.split(',').map(String::from).collect::<Vec<_>>());
.map(|attrs| attrs.split(',').map(String::from).collect());
let attributes_to_highlight = other
.attributes_to_highlight
.map(|attrs| attrs.split(',').map(String::from).collect::<HashSet<_>>());
.map(|attrs| attrs.split(',').map(String::from).collect());
let facets_distribution = other
.facets_distribution
.map(|attrs| attrs.split(',').map(String::from).collect::<Vec<_>>());
.map(|attrs| attrs.split(',').map(String::from).collect());
let filter = match other.filter {
Some(f) => match serde_json::from_str(&f) {
@ -62,6 +61,10 @@ impl From<SearchQueryGet> for SearchQuery {
None => None,
};
let sort = other
.sort
.map(|attrs| attrs.split(',').map(String::from).collect());
Self {
q: other.q,
offset: other.offset,
@ -71,6 +74,7 @@ impl From<SearchQueryGet> for SearchQuery {
crop_length: other.crop_length,
attributes_to_highlight,
filter,
sort,
matches: other.matches,
facets_distribution,
}

View File

@ -75,11 +75,18 @@ macro_rules! make_setting_route {
make_setting_route!(
"/filterable-attributes",
std::collections::HashSet<String>,
std::collections::BTreeSet<String>,
filterable_attributes,
"filterableAttributes"
);
make_setting_route!(
"/sortable-attributes",
std::collections::BTreeSet<String>,
sortable_attributes,
"sortableAttributes"
);
make_setting_route!(
"/displayed-attributes",
Vec<String>,
@ -132,6 +139,7 @@ macro_rules! generate_configure {
generate_configure!(
filterable_attributes,
sortable_attributes,
displayed_attributes,
searchable_attributes,
distinct_attribute,

View File

@ -146,6 +146,80 @@ async fn search_with_filter_array_notation() {
assert_eq!(response["hits"].as_array().unwrap().len(), 3);
}
#[actix_rt::test]
async fn search_with_sort_on_numbers() {
let server = Server::new().await;
let index = server.index("test");
index
.update_settings(json!({"sortableAttributes": ["id"]}))
.await;
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_update_id(1).await;
index
.search(
json!({
"sort": ["id:asc"]
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 5);
},
)
.await;
}
#[actix_rt::test]
async fn search_with_sort_on_strings() {
let server = Server::new().await;
let index = server.index("test");
index
.update_settings(json!({"sortableAttributes": ["title"]}))
.await;
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_update_id(1).await;
index
.search(
json!({
"sort": ["title:desc"]
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 5);
},
)
.await;
}
#[actix_rt::test]
async fn search_with_multiple_sort() {
let server = Server::new().await;
let index = server.index("test");
index
.update_settings(json!({"sortableAttributes": ["id", "title"]}))
.await;
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_update_id(1).await;
let (response, code) = index
.search_post(json!({
"sort": ["id:asc", "title:desc"]
}))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 5);
}
#[actix_rt::test]
async fn search_facet_distribution() {
let server = Server::new().await;

View File

@ -13,7 +13,14 @@ static DEFAULT_SETTINGS_VALUES: Lazy<HashMap<&'static str, Value>> = Lazy::new(|
map.insert("distinct_attribute", json!(Value::Null));
map.insert(
"ranking_rules",
json!(["words", "typo", "proximity", "attribute", "exactness"]),
json!([
"words",
"typo",
"sort",
"proximity",
"attribute",
"exactness"
]),
);
map.insert("stop_words", json!([]));
map.insert("synonyms", json!({}));
@ -35,14 +42,22 @@ async fn get_settings() {
let (response, code) = index.settings().await;
assert_eq!(code, 200);
let settings = response.as_object().unwrap();
assert_eq!(settings.keys().len(), 7);
assert_eq!(settings.keys().len(), 8);
assert_eq!(settings["displayedAttributes"], json!(["*"]));
assert_eq!(settings["searchableAttributes"], json!(["*"]));
assert_eq!(settings["filterableAttributes"], json!([]));
assert_eq!(settings["sortableAttributes"], json!([]));
assert_eq!(settings["distinctAttribute"], json!(null));
assert_eq!(
settings["rankingRules"],
json!(["words", "typo", "proximity", "attribute", "exactness"])
json!([
"words",
"typo",
"sort",
"proximity",
"attribute",
"exactness"
])
);
assert_eq!(settings["stopWords"], json!([]));
}