mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 14:54:27 +01:00
Merge #338
338: Fix string fields sorting r=Kerollmops a=shekhirin Resolves https://github.com/meilisearch/milli/issues/333 <details> <summary>curl checks</summary> ```console ➜ ~ curl -s 'localhost:7700/indexes/movies/search' -d '{"sort": ["title:asc"], "limit": 30}' | jq -r '.hits | map(.title)[]' #1 Cheerleader Camp #Horror #RealityHigh #Roxy #SquadGoals $5 a Day $9.99 '71 (2) (500) Days of Summer (Girl)Friend *batteries not included ...And God Created Woman ...And Justice for All ...E fuori nevica! .45 1 1 Mile To You 1 Night 10 10 Cloverfield Lane 10 giorni senza mamma 10 Items or Less 10 Rillington Place 10 Rules for Sleeping Around 10 Things I Hate About You 10 to Midnight 10 Years 10,000 BC 10,000 Saints ➜ ~ curl -s 'localhost:7700/indexes/movies/search' -d '{"sort": ["title:desc"], "limit": 30}' | jq -r '.hits | map(.title)[]' 크게 될 놈 왓칭 뷰티플 마인드 노무현과 바보들 ハニー Счастье – это… Часть 2 СОТКА Смотри мою любовь Позивний 'Бандерас' Лошо момиче Күлүк Хомус Куда течет море Каникулы президента Ακίνητο Ποτάμι Üç Harfliler: Beddua È nata una Star? Æon Flux Ága À propos de Nice À Nos Amours À l'aventure ¡Three Amigos! Zulu Dawn Zulu Zulu Zu: Warriors from the Magic Mountain Zu Warriors Zorro Zorba the Greek Zootopia ``` </details> Co-authored-by: Alexey Shekhirin <a.shekhirin@gmail.com>
This commit is contained in:
commit
446ed17589
@ -243,6 +243,7 @@ impl<'t> Iterator for FacetStringGroupRevRange<'t> {
|
|||||||
type Item = heed::Result<((NonZeroU8, u32, u32), (Option<(&'t str, &'t str)>, RoaringBitmap))>;
|
type Item = heed::Result<((NonZeroU8, u32, u32), (Option<(&'t str, &'t str)>, RoaringBitmap))>;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
loop {
|
||||||
match self.iter.next() {
|
match self.iter.next() {
|
||||||
Some(Ok(((_fid, level, left, right), docids))) => {
|
Some(Ok(((_fid, level, left, right), docids))) => {
|
||||||
let must_be_returned = match self.end {
|
let must_be_returned = match self.end {
|
||||||
@ -252,15 +253,17 @@ impl<'t> Iterator for FacetStringGroupRevRange<'t> {
|
|||||||
};
|
};
|
||||||
if must_be_returned {
|
if must_be_returned {
|
||||||
match docids.decode() {
|
match docids.decode() {
|
||||||
Ok((bounds, docids)) => Some(Ok(((level, left, right), (bounds, docids)))),
|
Ok((bounds, docids)) => {
|
||||||
Err(e) => Some(Err(e)),
|
return Some(Ok(((level, left, right), (bounds, docids))))
|
||||||
}
|
}
|
||||||
} else {
|
Err(e) => return Some(Err(e)),
|
||||||
None
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Some(Err(e)) => Some(Err(e)),
|
continue;
|
||||||
None => None,
|
}
|
||||||
|
Some(Err(e)) => return Some(Err(e)),
|
||||||
|
None => return None,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -545,18 +548,18 @@ impl<'t> Iterator for FacetStringIter<'t> {
|
|||||||
// the algorithm less complex to understand.
|
// the algorithm less complex to understand.
|
||||||
let last = match last {
|
let last = match last {
|
||||||
Left(ascending) => match ascending {
|
Left(ascending) => match ascending {
|
||||||
Left(last) => Left(Left(last)),
|
Left(group) => Left(Left(group)),
|
||||||
Right(last) => Right(Left(last)),
|
Right(zero_level) => Right(Left(zero_level)),
|
||||||
},
|
},
|
||||||
Right(descending) => match descending {
|
Right(descending) => match descending {
|
||||||
Left(last) => Left(Right(last)),
|
Left(group) => Left(Right(group)),
|
||||||
Right(last) => Right(Right(last)),
|
Right(zero_level) => Right(Right(zero_level)),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
match last {
|
match last {
|
||||||
Left(last) => {
|
Left(group) => {
|
||||||
for result in last {
|
for result in group {
|
||||||
match result {
|
match result {
|
||||||
Ok(((level, left, right), (string_bounds, mut docids))) => {
|
Ok(((level, left, right), (string_bounds, mut docids))) => {
|
||||||
docids &= &*documents_ids;
|
docids &= &*documents_ids;
|
||||||
@ -566,6 +569,27 @@ impl<'t> Iterator for FacetStringIter<'t> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let result = if is_ascending {
|
let result = if is_ascending {
|
||||||
|
match string_bounds {
|
||||||
|
Some((left, right)) => FacetStringLevelZeroRange::new(
|
||||||
|
self.rtxn,
|
||||||
|
self.db,
|
||||||
|
self.field_id,
|
||||||
|
Included(left),
|
||||||
|
Included(right),
|
||||||
|
)
|
||||||
|
.map(Right),
|
||||||
|
None => FacetStringGroupRange::new(
|
||||||
|
self.rtxn,
|
||||||
|
self.db,
|
||||||
|
self.field_id,
|
||||||
|
NonZeroU8::new(level.get() - 1).unwrap(),
|
||||||
|
Included(left),
|
||||||
|
Included(right),
|
||||||
|
)
|
||||||
|
.map(Left),
|
||||||
|
}
|
||||||
|
.map(Left)
|
||||||
|
} else {
|
||||||
match string_bounds {
|
match string_bounds {
|
||||||
Some((left, right)) => {
|
Some((left, right)) => {
|
||||||
FacetStringLevelZeroRevRange::new(
|
FacetStringLevelZeroRevRange::new(
|
||||||
@ -588,27 +612,6 @@ impl<'t> Iterator for FacetStringIter<'t> {
|
|||||||
.map(Left),
|
.map(Left),
|
||||||
}
|
}
|
||||||
.map(Right)
|
.map(Right)
|
||||||
} else {
|
|
||||||
match string_bounds {
|
|
||||||
Some((left, right)) => FacetStringLevelZeroRange::new(
|
|
||||||
self.rtxn,
|
|
||||||
self.db,
|
|
||||||
self.field_id,
|
|
||||||
Included(left),
|
|
||||||
Included(right),
|
|
||||||
)
|
|
||||||
.map(Right),
|
|
||||||
None => FacetStringGroupRange::new(
|
|
||||||
self.rtxn,
|
|
||||||
self.db,
|
|
||||||
self.field_id,
|
|
||||||
NonZeroU8::new(level.get() - 1).unwrap(),
|
|
||||||
Included(left),
|
|
||||||
Included(right),
|
|
||||||
)
|
|
||||||
.map(Left),
|
|
||||||
}
|
|
||||||
.map(Left)
|
|
||||||
};
|
};
|
||||||
|
|
||||||
match result {
|
match result {
|
||||||
@ -624,9 +627,9 @@ impl<'t> Iterator for FacetStringIter<'t> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Right(last) => {
|
Right(zero_level) => {
|
||||||
// level zero only
|
// level zero only
|
||||||
for result in last {
|
for result in zero_level {
|
||||||
match result {
|
match result {
|
||||||
Ok((normalized, original, mut docids)) => {
|
Ok((normalized, original, mut docids)) => {
|
||||||
docids &= &*documents_ids;
|
docids &= &*documents_ids;
|
||||||
|
@ -1,6 +1,12 @@
|
|||||||
|
use std::cmp::Reverse;
|
||||||
|
|
||||||
use big_s::S;
|
use big_s::S;
|
||||||
use milli::update::Settings;
|
use heed::EnvOpenOptions;
|
||||||
use milli::{AscDesc, Criterion, Search, SearchResult};
|
use itertools::Itertools;
|
||||||
|
use maplit::hashset;
|
||||||
|
use milli::update::{Settings, UpdateBuilder, UpdateFormat};
|
||||||
|
use milli::{AscDesc, Criterion, Index, Search, SearchResult};
|
||||||
|
use rand::Rng;
|
||||||
use Criterion::*;
|
use Criterion::*;
|
||||||
|
|
||||||
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
|
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
|
||||||
@ -9,6 +15,7 @@ const ALLOW_TYPOS: bool = true;
|
|||||||
const DISALLOW_TYPOS: bool = false;
|
const DISALLOW_TYPOS: bool = false;
|
||||||
const ALLOW_OPTIONAL_WORDS: bool = true;
|
const ALLOW_OPTIONAL_WORDS: bool = true;
|
||||||
const DISALLOW_OPTIONAL_WORDS: bool = false;
|
const DISALLOW_OPTIONAL_WORDS: bool = false;
|
||||||
|
const ASC_DESC_CANDIDATES_THRESHOLD: usize = 1000;
|
||||||
|
|
||||||
macro_rules! test_criterion {
|
macro_rules! test_criterion {
|
||||||
($func:ident, $optional_word:ident, $authorize_typos:ident, $criteria:expr, $sort_criteria:expr) => {
|
($func:ident, $optional_word:ident, $authorize_typos:ident, $criteria:expr, $sort_criteria:expr) => {
|
||||||
@ -357,3 +364,93 @@ fn criteria_mixup() {
|
|||||||
assert_eq!(documents_ids, expected_external_ids);
|
assert_eq!(documents_ids, expected_external_ids);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn criteria_ascdesc() {
|
||||||
|
let path = tempfile::tempdir().unwrap();
|
||||||
|
let mut options = EnvOpenOptions::new();
|
||||||
|
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||||
|
let index = Index::new(options, &path).unwrap();
|
||||||
|
|
||||||
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
|
|
||||||
|
let mut builder = Settings::new(&mut wtxn, &index, 0);
|
||||||
|
|
||||||
|
builder.set_sortable_fields(hashset! {
|
||||||
|
S("name"),
|
||||||
|
S("age"),
|
||||||
|
});
|
||||||
|
builder.execute(|_, _| ()).unwrap();
|
||||||
|
|
||||||
|
// index documents
|
||||||
|
let mut builder = UpdateBuilder::new(0);
|
||||||
|
builder.max_memory(10 * 1024 * 1024); // 10MiB
|
||||||
|
let mut builder = builder.index_documents(&mut wtxn, &index);
|
||||||
|
builder.update_format(UpdateFormat::Csv);
|
||||||
|
builder.enable_autogenerate_docids();
|
||||||
|
|
||||||
|
let content = [
|
||||||
|
vec![S("name,age")],
|
||||||
|
(0..ASC_DESC_CANDIDATES_THRESHOLD + 1)
|
||||||
|
.map(|_| {
|
||||||
|
let mut rng = rand::thread_rng();
|
||||||
|
|
||||||
|
let age = rng.gen::<u32>().to_string();
|
||||||
|
let name = rng
|
||||||
|
.sample_iter(&rand::distributions::Alphanumeric)
|
||||||
|
.map(char::from)
|
||||||
|
.filter(|c| *c >= 'a' && *c <= 'z')
|
||||||
|
.take(10)
|
||||||
|
.collect::<String>();
|
||||||
|
|
||||||
|
format!("{},{}", name, age)
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>(),
|
||||||
|
]
|
||||||
|
.iter()
|
||||||
|
.flatten()
|
||||||
|
.join("\n");
|
||||||
|
builder.execute(content.as_bytes(), |_, _| ()).unwrap();
|
||||||
|
|
||||||
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let documents = index.all_documents(&rtxn).unwrap().map(|doc| doc.unwrap()).collect::<Vec<_>>();
|
||||||
|
|
||||||
|
for criterion in [Asc(S("name")), Desc(S("name")), Asc(S("age")), Desc(S("age"))] {
|
||||||
|
eprintln!("Testing with criterion: {:?}", &criterion);
|
||||||
|
|
||||||
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
|
let mut builder = Settings::new(&mut wtxn, &index, 0);
|
||||||
|
builder.set_criteria(vec![criterion.to_string()]);
|
||||||
|
builder.execute(|_, _| ()).unwrap();
|
||||||
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
|
let mut rtxn = index.read_txn().unwrap();
|
||||||
|
|
||||||
|
let mut search = Search::new(&mut rtxn, &index);
|
||||||
|
search.limit(ASC_DESC_CANDIDATES_THRESHOLD + 1);
|
||||||
|
|
||||||
|
let SearchResult { documents_ids, .. } = search.execute().unwrap();
|
||||||
|
|
||||||
|
let expected_document_ids = match criterion {
|
||||||
|
Asc(field_name) if field_name == "name" => {
|
||||||
|
documents.iter().sorted_by_key(|(_, obkv)| obkv.get(0).unwrap())
|
||||||
|
}
|
||||||
|
Desc(field_name) if field_name == "name" => {
|
||||||
|
documents.iter().sorted_by_key(|(_, obkv)| Reverse(obkv.get(0).unwrap()))
|
||||||
|
}
|
||||||
|
Asc(field_name) if field_name == "name" => {
|
||||||
|
documents.iter().sorted_by_key(|(_, obkv)| obkv.get(1).unwrap())
|
||||||
|
}
|
||||||
|
Desc(field_name) if field_name == "name" => {
|
||||||
|
documents.iter().sorted_by_key(|(_, obkv)| Reverse(obkv.get(1).unwrap()))
|
||||||
|
}
|
||||||
|
_ => continue,
|
||||||
|
}
|
||||||
|
.map(|(id, _)| *id)
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
assert_eq!(documents_ids, expected_document_ids);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user