mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-23 13:24:27 +01:00
Reintroduce exacteness for one word document field
This commit is contained in:
parent
746e6e170c
commit
7d67750865
@ -1,4 +1,6 @@
|
|||||||
use std::cmp::{Ordering, Reverse};
|
use std::cmp::{Ordering, Reverse};
|
||||||
|
use std::collections::hash_map::{HashMap, Entry};
|
||||||
|
use meilisearch_schema::SchemaAttr;
|
||||||
use slice_group_by::GroupBy;
|
use slice_group_by::GroupBy;
|
||||||
use crate::{RawDocument, MResult};
|
use crate::{RawDocument, MResult};
|
||||||
use crate::bucket_sort::BareMatch;
|
use crate::bucket_sort::BareMatch;
|
||||||
@ -11,13 +13,44 @@ impl Criterion for Exact {
|
|||||||
|
|
||||||
fn prepare<'h, 'p, 'tag, 'txn, 'q, 'a, 'r>(
|
fn prepare<'h, 'p, 'tag, 'txn, 'q, 'a, 'r>(
|
||||||
&self,
|
&self,
|
||||||
_ctx: ContextMut<'h, 'p, 'tag, 'txn, 'q, 'a>,
|
ctx: ContextMut<'h, 'p, 'tag, 'txn, 'q, 'a>,
|
||||||
documents: &mut [RawDocument<'r, 'tag>],
|
documents: &mut [RawDocument<'r, 'tag>],
|
||||||
) -> MResult<()>
|
) -> MResult<()>
|
||||||
{
|
{
|
||||||
for document in documents {
|
let store = ctx.documents_fields_counts_store;
|
||||||
document.raw_matches.sort_unstable_by_key(|bm| (bm.query_index, Reverse(bm.is_exact)));
|
let reader = ctx.reader;
|
||||||
|
|
||||||
|
'documents: for doc in documents {
|
||||||
|
doc.raw_matches.sort_unstable_by_key(|bm| (bm.query_index, Reverse(bm.is_exact)));
|
||||||
|
|
||||||
|
// mark the document if we find a "one word field" that matches
|
||||||
|
let mut fields_counts = HashMap::new();
|
||||||
|
for group in doc.raw_matches.linear_group_by_key(|bm| bm.query_index) {
|
||||||
|
for group in group.linear_group_by_key(|bm| bm.is_exact) {
|
||||||
|
if !group[0].is_exact { break }
|
||||||
|
|
||||||
|
for bm in group {
|
||||||
|
for di in ctx.postings_lists[bm.postings_list].as_ref() {
|
||||||
|
|
||||||
|
let attr = SchemaAttr(di.attribute);
|
||||||
|
let count = match fields_counts.entry(attr) {
|
||||||
|
Entry::Occupied(entry) => *entry.get(),
|
||||||
|
Entry::Vacant(entry) => {
|
||||||
|
let count = store.document_field_count(reader, doc.id, attr)?;
|
||||||
|
*entry.insert(count)
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
if count == Some(1) {
|
||||||
|
doc.contains_one_word_field = true;
|
||||||
|
continue 'documents
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -33,8 +66,13 @@ impl Criterion for Exact {
|
|||||||
sum_exact_query_words
|
sum_exact_query_words
|
||||||
}
|
}
|
||||||
|
|
||||||
let lhs = sum_exact_query_words(&lhs.raw_matches);
|
// does it contains a "one word field"
|
||||||
let rhs = sum_exact_query_words(&rhs.raw_matches);
|
lhs.contains_one_word_field.cmp(&rhs.contains_one_word_field).reverse()
|
||||||
lhs.cmp(&rhs).reverse()
|
// if not, with document contains the more exact words
|
||||||
|
.then_with(|| {
|
||||||
|
let lhs = sum_exact_query_words(&lhs.raw_matches);
|
||||||
|
let rhs = sum_exact_query_words(&rhs.raw_matches);
|
||||||
|
lhs.cmp(&rhs).reverse()
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -284,11 +284,7 @@ mod tests {
|
|||||||
|
|
||||||
writer.commit().unwrap();
|
writer.commit().unwrap();
|
||||||
|
|
||||||
TempDatabase {
|
TempDatabase { database, index, _tempdir: tempdir }
|
||||||
database,
|
|
||||||
index,
|
|
||||||
_tempdir: tempdir,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1162,6 +1158,46 @@ mod tests {
|
|||||||
assert_matches!(iter.next(), None);
|
assert_matches!(iter.next(), None);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn exact_field_count_one_word() {
|
||||||
|
let store = TempDatabase::from_iter(vec![
|
||||||
|
("searchengine", &[doc_index(0, 0)][..]),
|
||||||
|
("searchengine", &[doc_index(1, 0)][..]),
|
||||||
|
("blue", &[doc_index(1, 1)][..]),
|
||||||
|
("searchangine", &[doc_index(2, 0)][..]),
|
||||||
|
("searchengine", &[doc_index(3, 0)][..]),
|
||||||
|
]);
|
||||||
|
|
||||||
|
let db = &store.database;
|
||||||
|
let reader = db.main_read_txn().unwrap();
|
||||||
|
|
||||||
|
let builder = store.query_builder();
|
||||||
|
let results = builder.query(&reader, "searchengine", 0..20).unwrap();
|
||||||
|
let mut iter = results.into_iter();
|
||||||
|
|
||||||
|
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
|
||||||
|
let mut iter = matches.into_iter();
|
||||||
|
assert_matches!(iter.next(), Some(SimpleMatch { query_index: 0, word_index: 0, distance: 0, .. })); // searchengine
|
||||||
|
assert_matches!(iter.next(), None);
|
||||||
|
});
|
||||||
|
assert_matches!(iter.next(), Some(Document { id: DocumentId(3), matches, .. }) => {
|
||||||
|
let mut iter = matches.into_iter();
|
||||||
|
assert_matches!(iter.next(), Some(SimpleMatch { query_index: 0, word_index: 0, distance: 0, .. })); // searchengine
|
||||||
|
assert_matches!(iter.next(), None);
|
||||||
|
});
|
||||||
|
assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => {
|
||||||
|
let mut iter = matches.into_iter();
|
||||||
|
assert_matches!(iter.next(), Some(SimpleMatch { query_index: 0, word_index: 0, distance: 0, .. })); // searchengine
|
||||||
|
assert_matches!(iter.next(), None);
|
||||||
|
});
|
||||||
|
assert_matches!(iter.next(), Some(Document { id: DocumentId(2), matches, .. }) => {
|
||||||
|
let mut iter = matches.into_iter();
|
||||||
|
assert_matches!(iter.next(), Some(SimpleMatch { query_index: 0, word_index: 0, distance: 1, .. })); // searchengine
|
||||||
|
assert_matches!(iter.next(), None);
|
||||||
|
});
|
||||||
|
assert_matches!(iter.next(), None);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn simple_phrase_query_splitting() {
|
fn simple_phrase_query_splitting() {
|
||||||
let store = TempDatabase::from_iter(vec![
|
let store = TempDatabase::from_iter(vec![
|
||||||
|
Loading…
Reference in New Issue
Block a user