1615: Integrate the query time sort feature r=Kerollmops a=Kerollmops

This pull request integrates the sort at query time feature that was implemented on the milli side https://github.com/meilisearch/milli/pull/320. It follows the specification file https://github.com/meilisearch/specifications/blob/develop/text/0055-sort.md.

A bunch of tests has been added to ensure that the search works correctly and that the settings are fine too!

Co-authored-by: Kerollmops <clement@meilisearch.com>
This commit is contained in:
bors[bot] 2021-08-26 14:09:38 +00:00 committed by GitHub
commit 96f72f009a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 161 additions and 37 deletions

17
Cargo.lock generated
View File

@ -1585,7 +1585,6 @@ dependencies = [
"fst", "fst",
"futures", "futures",
"futures-util", "futures-util",
"grenad",
"heed", "heed",
"hex", "hex",
"http", "http",
@ -1676,8 +1675,8 @@ dependencies = [
[[package]] [[package]]
name = "milli" name = "milli"
version = "0.10.2" version = "0.11.0"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.10.2#879d5e8799836d93f8995810965b6797be4f69d1" source = "git+https://github.com/meilisearch/milli.git?tag=v0.11.0#c51bb6789cb3fbb6511138374b3443f9116a445c"
dependencies = [ dependencies = [
"bstr", "bstr",
"byteorder", "byteorder",
@ -1704,7 +1703,6 @@ dependencies = [
"pest 2.1.3 (git+https://github.com/pest-parser/pest.git?rev=51fd1d49f1041f7839975664ef71fe15c7dcaf67)", "pest 2.1.3 (git+https://github.com/pest-parser/pest.git?rev=51fd1d49f1041f7839975664ef71fe15c7dcaf67)",
"pest_derive", "pest_derive",
"rayon", "rayon",
"regex",
"roaring", "roaring",
"serde", "serde",
"serde_json", "serde_json",
@ -1712,7 +1710,6 @@ dependencies = [
"smallstr", "smallstr",
"smallvec", "smallvec",
"tempfile", "tempfile",
"tinytemplate",
"uuid", "uuid",
] ]
@ -2937,16 +2934,6 @@ dependencies = [
"syn 1.0.73", "syn 1.0.73",
] ]
[[package]]
name = "tinytemplate"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d3dc76004a03cec1c5932bca4cdc2e39aaa798e3f82363dd94f9adf6098c12f"
dependencies = [
"serde",
"serde_json",
]
[[package]] [[package]]
name = "tinyvec" name = "tinyvec"
version = "1.2.0" version = "1.2.0"

View File

@ -63,6 +63,7 @@ pub enum Code {
Facet, Facet,
Filter, Filter,
Sort,
BadParameter, BadParameter,
BadRequest, BadRequest,
@ -116,6 +117,8 @@ impl Code {
Facet => ErrCode::invalid("invalid_facet", StatusCode::BAD_REQUEST), Facet => ErrCode::invalid("invalid_facet", StatusCode::BAD_REQUEST),
// error related to filters // error related to filters
Filter => ErrCode::invalid("invalid_filter", StatusCode::BAD_REQUEST), Filter => ErrCode::invalid("invalid_filter", StatusCode::BAD_REQUEST),
// error related to sorts
Sort => ErrCode::invalid("invalid_sort", StatusCode::BAD_REQUEST),
BadParameter => ErrCode::invalid("bad_parameter", StatusCode::BAD_REQUEST), BadParameter => ErrCode::invalid("bad_parameter", StatusCode::BAD_REQUEST),
BadRequest => ErrCode::invalid("bad_request", StatusCode::BAD_REQUEST), BadRequest => ErrCode::invalid("bad_request", StatusCode::BAD_REQUEST),

View File

@ -41,7 +41,6 @@ flate2 = "1.0.19"
fst = "0.4.5" fst = "0.4.5"
futures = "0.3.7" futures = "0.3.7"
futures-util = "0.3.8" futures-util = "0.3.8"
grenad = { git = "https://github.com/Kerollmops/grenad.git", rev = "3adcb26" }
heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.1" } heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.1" }
http = "0.2.1" http = "0.2.1"
indexmap = { version = "1.3.2", features = ["serde-1"] } indexmap = { version = "1.3.2", features = ["serde-1"] }
@ -51,7 +50,7 @@ main_error = "0.1.0"
meilisearch-error = { path = "../meilisearch-error" } meilisearch-error = { path = "../meilisearch-error" }
meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.5" } meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.5" }
memmap = "0.7.0" memmap = "0.7.0"
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.10.2" } milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.11.0" }
mime = "0.3.16" mime = "0.3.16"
num_cpus = "1.13.0" num_cpus = "1.13.0"
once_cell = "1.5.2" once_cell = "1.5.2"

View File

@ -103,6 +103,7 @@ impl ErrorCode for MilliError<'_> {
UserError::PrimaryKeyCannotBeReset => Code::PrimaryKeyAlreadyPresent, UserError::PrimaryKeyCannotBeReset => Code::PrimaryKeyAlreadyPresent,
UserError::UnknownInternalDocumentId { .. } => Code::DocumentNotFound, UserError::UnknownInternalDocumentId { .. } => Code::DocumentNotFound,
UserError::InvalidFacetsDistribution { .. } => Code::BadRequest, UserError::InvalidFacetsDistribution { .. } => Code::BadRequest,
UserError::InvalidSortableAttribute { .. } => Code::Sort,
} }
} }
} }

View File

@ -63,6 +63,8 @@ impl Index {
let filterable_attributes = self.filterable_fields(txn)?.into_iter().collect(); let filterable_attributes = self.filterable_fields(txn)?.into_iter().collect();
let sortable_attributes = self.sortable_fields(txn)?.into_iter().collect();
let criteria = self let criteria = self
.criteria(txn)? .criteria(txn)?
.into_iter() .into_iter()
@ -101,6 +103,7 @@ impl Index {
None => Setting::Reset, None => Setting::Reset,
}, },
filterable_attributes: Setting::Set(filterable_attributes), filterable_attributes: Setting::Set(filterable_attributes),
sortable_attributes: Setting::Set(sortable_attributes),
ranking_rules: Setting::Set(criteria), ranking_rules: Setting::Set(criteria),
stop_words: Setting::Set(stop_words), stop_words: Setting::Set(stop_words),
distinct_attribute: match distinct_field { distinct_attribute: match distinct_field {

View File

@ -1,15 +1,17 @@
use std::collections::{BTreeMap, BTreeSet, HashSet}; use std::collections::{BTreeMap, BTreeSet, HashSet};
use std::str::FromStr;
use std::time::Instant; use std::time::Instant;
use either::Either; use either::Either;
use heed::RoTxn; use heed::RoTxn;
use indexmap::IndexMap; use indexmap::IndexMap;
use meilisearch_tokenizer::{Analyzer, AnalyzerConfig, Token}; use meilisearch_tokenizer::{Analyzer, AnalyzerConfig, Token};
use milli::{FieldId, FieldsIdsMap, FilterCondition, MatchingWords}; use milli::{AscDesc, FieldId, FieldsIdsMap, FilterCondition, MatchingWords};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_json::Value; use serde_json::Value;
use crate::index::error::FacetError; use crate::index::error::FacetError;
use crate::index::IndexError;
use super::error::Result; use super::error::Result;
use super::Index; use super::Index;
@ -49,6 +51,7 @@ pub struct SearchQuery {
#[serde(default = "Default::default")] #[serde(default = "Default::default")]
pub matches: bool, pub matches: bool,
pub filter: Option<Value>, pub filter: Option<Value>,
pub sort: Option<Vec<String>>,
pub facets_distribution: Option<Vec<String>>, pub facets_distribution: Option<Vec<String>>,
} }
@ -104,6 +107,15 @@ impl Index {
} }
} }
if let Some(ref sort) = query.sort {
let sort = match sort.iter().map(|s| AscDesc::from_str(s)).collect() {
Ok(sorts) => sorts,
Err(err) => return Err(IndexError::Milli(err.into())),
};
search.sort_criteria(sort);
}
let milli::SearchResult { let milli::SearchResult {
documents_ids, documents_ids,
matching_words, matching_words,

View File

@ -1,8 +1,8 @@
use std::fs::File; use std::fs::File;
use crate::index::Index; use crate::index::Index;
use grenad::CompressionType;
use milli::update::UpdateBuilder; use milli::update::UpdateBuilder;
use milli::CompressionType;
use rayon::ThreadPool; use rayon::ThreadPool;
use crate::index_controller::UpdateMeta; use crate::index_controller::UpdateMeta;

View File

@ -55,7 +55,9 @@ pub struct Settings<T> {
pub searchable_attributes: Setting<Vec<String>>, pub searchable_attributes: Setting<Vec<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")] #[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub filterable_attributes: Setting<HashSet<String>>, pub filterable_attributes: Setting<BTreeSet<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub sortable_attributes: Setting<BTreeSet<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")] #[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub ranking_rules: Setting<Vec<String>>, pub ranking_rules: Setting<Vec<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")] #[serde(default, skip_serializing_if = "Setting::is_not_set")]
@ -75,6 +77,7 @@ impl Settings<Checked> {
displayed_attributes: Setting::Reset, displayed_attributes: Setting::Reset,
searchable_attributes: Setting::Reset, searchable_attributes: Setting::Reset,
filterable_attributes: Setting::Reset, filterable_attributes: Setting::Reset,
sortable_attributes: Setting::Reset,
ranking_rules: Setting::Reset, ranking_rules: Setting::Reset,
stop_words: Setting::Reset, stop_words: Setting::Reset,
synonyms: Setting::Reset, synonyms: Setting::Reset,
@ -88,6 +91,7 @@ impl Settings<Checked> {
displayed_attributes, displayed_attributes,
searchable_attributes, searchable_attributes,
filterable_attributes, filterable_attributes,
sortable_attributes,
ranking_rules, ranking_rules,
stop_words, stop_words,
synonyms, synonyms,
@ -99,6 +103,7 @@ impl Settings<Checked> {
displayed_attributes, displayed_attributes,
searchable_attributes, searchable_attributes,
filterable_attributes, filterable_attributes,
sortable_attributes,
ranking_rules, ranking_rules,
stop_words, stop_words,
synonyms, synonyms,
@ -136,6 +141,7 @@ impl Settings<Unchecked> {
displayed_attributes, displayed_attributes,
searchable_attributes, searchable_attributes,
filterable_attributes: self.filterable_attributes, filterable_attributes: self.filterable_attributes,
sortable_attributes: self.sortable_attributes,
ranking_rules: self.ranking_rules, ranking_rules: self.ranking_rules,
stop_words: self.stop_words, stop_words: self.stop_words,
synonyms: self.synonyms, synonyms: self.synonyms,
@ -248,8 +254,21 @@ impl Index {
} }
match settings.filterable_attributes { match settings.filterable_attributes {
Setting::Set(ref facet_types) => builder.set_filterable_fields(facet_types.clone()), Setting::Set(ref facets) => {
Setting::Reset => builder.set_filterable_fields(HashSet::new()), builder.set_filterable_fields(facets.clone().into_iter().collect())
}
Setting::Reset => builder.reset_filterable_fields(),
Setting::NotSet => (),
}
match settings.sortable_attributes {
Setting::Set(ref fields) => {
builder.set_sortable_fields(fields.iter().cloned().collect())
}
Setting::Reset => {
// TODO we must use the reset_sortable_fields in a futur PR.
builder.set_sortable_fields(HashSet::new())
}
Setting::NotSet => (), Setting::NotSet => (),
} }
@ -328,6 +347,7 @@ mod test {
displayed_attributes: Setting::Set(vec![String::from("hello")]), displayed_attributes: Setting::Set(vec![String::from("hello")]),
searchable_attributes: Setting::Set(vec![String::from("hello")]), searchable_attributes: Setting::Set(vec![String::from("hello")]),
filterable_attributes: Setting::NotSet, filterable_attributes: Setting::NotSet,
sortable_attributes: Setting::NotSet,
ranking_rules: Setting::NotSet, ranking_rules: Setting::NotSet,
stop_words: Setting::NotSet, stop_words: Setting::NotSet,
synonyms: Setting::NotSet, synonyms: Setting::NotSet,
@ -348,6 +368,7 @@ mod test {
displayed_attributes: Setting::Set(vec![String::from("*")]), displayed_attributes: Setting::Set(vec![String::from("*")]),
searchable_attributes: Setting::Set(vec![String::from("hello"), String::from("*")]), searchable_attributes: Setting::Set(vec![String::from("hello"), String::from("*")]),
filterable_attributes: Setting::NotSet, filterable_attributes: Setting::NotSet,
sortable_attributes: Setting::NotSet,
ranking_rules: Setting::NotSet, ranking_rules: Setting::NotSet,
stop_words: Setting::NotSet, stop_words: Setting::NotSet,
synonyms: Setting::NotSet, synonyms: Setting::NotSet,

View File

@ -158,15 +158,12 @@ impl From<Settings> for index_controller::Settings<Unchecked> {
Some(None) => Setting::Reset, Some(None) => Setting::Reset,
None => Setting::NotSet None => Setting::NotSet
}, },
// we previously had a `Vec<String>` but now we have a `HashMap<String, String>`
// representing the name of the faceted field + the type of the field. Since the type
// was not known in the V1 of the dump we are just going to assume everything is a
// String
filterable_attributes: match settings.attributes_for_faceting { filterable_attributes: match settings.attributes_for_faceting {
Some(Some(attrs)) => Setting::Set(attrs.into_iter().collect()), Some(Some(attrs)) => Setting::Set(attrs.into_iter().collect()),
Some(None) => Setting::Reset, Some(None) => Setting::Reset,
None => Setting::NotSet None => Setting::NotSet
}, },
sortable_attributes: Setting::NotSet,
// we need to convert the old `Vec<String>` into a `BTreeSet<String>` // we need to convert the old `Vec<String>` into a `BTreeSet<String>`
ranking_rules: match settings.ranking_rules { ranking_rules: match settings.ranking_rules {
Some(Some(ranking_rules)) => Setting::Set(ranking_rules.into_iter().filter(|criterion| { Some(Some(ranking_rules)) => Setting::Set(ranking_rules.into_iter().filter(|criterion| {

View File

@ -4,7 +4,7 @@ use std::sync::Arc;
use std::{error, fs}; use std::{error, fs};
use byte_unit::Byte; use byte_unit::Byte;
use grenad::CompressionType; use milli::CompressionType;
use rustls::internal::pemfile::{certs, pkcs8_private_keys, rsa_private_keys}; use rustls::internal::pemfile::{certs, pkcs8_private_keys, rsa_private_keys};
use rustls::{ use rustls::{
AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient, NoClientAuth, AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient, NoClientAuth,

View File

@ -1,5 +1,3 @@
use std::collections::{BTreeSet, HashSet};
use actix_web::{web, HttpResponse}; use actix_web::{web, HttpResponse};
use log::debug; use log::debug;
use serde::Deserialize; use serde::Deserialize;
@ -31,6 +29,7 @@ pub struct SearchQueryGet {
crop_length: usize, crop_length: usize,
attributes_to_highlight: Option<String>, attributes_to_highlight: Option<String>,
filter: Option<String>, filter: Option<String>,
sort: Option<String>,
#[serde(default = "Default::default")] #[serde(default = "Default::default")]
matches: bool, matches: bool,
facets_distribution: Option<String>, facets_distribution: Option<String>,
@ -40,19 +39,19 @@ impl From<SearchQueryGet> for SearchQuery {
fn from(other: SearchQueryGet) -> Self { fn from(other: SearchQueryGet) -> Self {
let attributes_to_retrieve = other let attributes_to_retrieve = other
.attributes_to_retrieve .attributes_to_retrieve
.map(|attrs| attrs.split(',').map(String::from).collect::<BTreeSet<_>>()); .map(|attrs| attrs.split(',').map(String::from).collect());
let attributes_to_crop = other let attributes_to_crop = other
.attributes_to_crop .attributes_to_crop
.map(|attrs| attrs.split(',').map(String::from).collect::<Vec<_>>()); .map(|attrs| attrs.split(',').map(String::from).collect());
let attributes_to_highlight = other let attributes_to_highlight = other
.attributes_to_highlight .attributes_to_highlight
.map(|attrs| attrs.split(',').map(String::from).collect::<HashSet<_>>()); .map(|attrs| attrs.split(',').map(String::from).collect());
let facets_distribution = other let facets_distribution = other
.facets_distribution .facets_distribution
.map(|attrs| attrs.split(',').map(String::from).collect::<Vec<_>>()); .map(|attrs| attrs.split(',').map(String::from).collect());
let filter = match other.filter { let filter = match other.filter {
Some(f) => match serde_json::from_str(&f) { Some(f) => match serde_json::from_str(&f) {
@ -62,6 +61,10 @@ impl From<SearchQueryGet> for SearchQuery {
None => None, None => None,
}; };
let sort = other
.sort
.map(|attrs| attrs.split(',').map(String::from).collect());
Self { Self {
q: other.q, q: other.q,
offset: other.offset, offset: other.offset,
@ -71,6 +74,7 @@ impl From<SearchQueryGet> for SearchQuery {
crop_length: other.crop_length, crop_length: other.crop_length,
attributes_to_highlight, attributes_to_highlight,
filter, filter,
sort,
matches: other.matches, matches: other.matches,
facets_distribution, facets_distribution,
} }

View File

@ -75,11 +75,18 @@ macro_rules! make_setting_route {
make_setting_route!( make_setting_route!(
"/filterable-attributes", "/filterable-attributes",
std::collections::HashSet<String>, std::collections::BTreeSet<String>,
filterable_attributes, filterable_attributes,
"filterableAttributes" "filterableAttributes"
); );
make_setting_route!(
"/sortable-attributes",
std::collections::BTreeSet<String>,
sortable_attributes,
"sortableAttributes"
);
make_setting_route!( make_setting_route!(
"/displayed-attributes", "/displayed-attributes",
Vec<String>, Vec<String>,
@ -132,6 +139,7 @@ macro_rules! generate_configure {
generate_configure!( generate_configure!(
filterable_attributes, filterable_attributes,
sortable_attributes,
displayed_attributes, displayed_attributes,
searchable_attributes, searchable_attributes,
distinct_attribute, distinct_attribute,

View File

@ -146,6 +146,80 @@ async fn search_with_filter_array_notation() {
assert_eq!(response["hits"].as_array().unwrap().len(), 3); assert_eq!(response["hits"].as_array().unwrap().len(), 3);
} }
#[actix_rt::test]
async fn search_with_sort_on_numbers() {
let server = Server::new().await;
let index = server.index("test");
index
.update_settings(json!({"sortableAttributes": ["id"]}))
.await;
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_update_id(1).await;
index
.search(
json!({
"sort": ["id:asc"]
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 5);
},
)
.await;
}
#[actix_rt::test]
async fn search_with_sort_on_strings() {
let server = Server::new().await;
let index = server.index("test");
index
.update_settings(json!({"sortableAttributes": ["title"]}))
.await;
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_update_id(1).await;
index
.search(
json!({
"sort": ["title:desc"]
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 5);
},
)
.await;
}
#[actix_rt::test]
async fn search_with_multiple_sort() {
let server = Server::new().await;
let index = server.index("test");
index
.update_settings(json!({"sortableAttributes": ["id", "title"]}))
.await;
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_update_id(1).await;
let (response, code) = index
.search_post(json!({
"sort": ["id:asc", "title:desc"]
}))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 5);
}
#[actix_rt::test] #[actix_rt::test]
async fn search_facet_distribution() { async fn search_facet_distribution() {
let server = Server::new().await; let server = Server::new().await;

View File

@ -13,7 +13,14 @@ static DEFAULT_SETTINGS_VALUES: Lazy<HashMap<&'static str, Value>> = Lazy::new(|
map.insert("distinct_attribute", json!(Value::Null)); map.insert("distinct_attribute", json!(Value::Null));
map.insert( map.insert(
"ranking_rules", "ranking_rules",
json!(["words", "typo", "proximity", "attribute", "exactness"]), json!([
"words",
"typo",
"sort",
"proximity",
"attribute",
"exactness"
]),
); );
map.insert("stop_words", json!([])); map.insert("stop_words", json!([]));
map.insert("synonyms", json!({})); map.insert("synonyms", json!({}));
@ -35,14 +42,22 @@ async fn get_settings() {
let (response, code) = index.settings().await; let (response, code) = index.settings().await;
assert_eq!(code, 200); assert_eq!(code, 200);
let settings = response.as_object().unwrap(); let settings = response.as_object().unwrap();
assert_eq!(settings.keys().len(), 7); assert_eq!(settings.keys().len(), 8);
assert_eq!(settings["displayedAttributes"], json!(["*"])); assert_eq!(settings["displayedAttributes"], json!(["*"]));
assert_eq!(settings["searchableAttributes"], json!(["*"])); assert_eq!(settings["searchableAttributes"], json!(["*"]));
assert_eq!(settings["filterableAttributes"], json!([])); assert_eq!(settings["filterableAttributes"], json!([]));
assert_eq!(settings["sortableAttributes"], json!([]));
assert_eq!(settings["distinctAttribute"], json!(null)); assert_eq!(settings["distinctAttribute"], json!(null));
assert_eq!( assert_eq!(
settings["rankingRules"], settings["rankingRules"],
json!(["words", "typo", "proximity", "attribute", "exactness"]) json!([
"words",
"typo",
"sort",
"proximity",
"attribute",
"exactness"
])
); );
assert_eq!(settings["stopWords"], json!([])); assert_eq!(settings["stopWords"], json!([]));
} }