mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 06:44:27 +01:00
make the attribute ranking rule use the weights and fix the tests
This commit is contained in:
parent
a0082c4df9
commit
caa6a7149a
@ -85,6 +85,9 @@ impl SearchQueue {
|
||||
},
|
||||
|
||||
search_request = receive_new_searches.recv() => {
|
||||
if search_request.is_none() {
|
||||
continue;
|
||||
}
|
||||
// this unwrap is safe because we're sure the `SearchQueue` still lives somewhere in actix-web
|
||||
let search_request = search_request.unwrap();
|
||||
if searches_running < usize::from(parallelism) && queue.is_empty() {
|
||||
|
@ -2722,8 +2722,8 @@ pub(crate) mod tests {
|
||||
// We should find tamo first
|
||||
insta::assert_debug_snapshot!(results.documents_ids, @r###"
|
||||
[
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
@ -76,7 +76,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
|
||||
let mut exact = Vec::new();
|
||||
let mut tolerant = Vec::new();
|
||||
for (name, fid, weight) in searchable_fids {
|
||||
for (_name, fid, weight) in searchable_fids {
|
||||
if exact_attributes_ids.contains(&fid) {
|
||||
exact.push((fid, weight));
|
||||
} else {
|
||||
@ -96,22 +96,26 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
})
|
||||
}
|
||||
|
||||
// TODO: TAMO continue here
|
||||
pub fn searchable_attributes(&mut self, attributes_to_search_on: &'ctx [String]) -> Result<()> {
|
||||
if attributes_to_search_on.contains(&String::from("*")) {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let fids_map = self.index.fields_ids_map(self.txn)?;
|
||||
let user_defined_searchable = self.index.user_defined_searchable_fields(self.txn)?;
|
||||
let searchable_names = self.index.searchable_fields_and_weights(self.txn)?;
|
||||
let exact_attributes_ids = self.index.exact_attributes_ids(self.txn)?;
|
||||
|
||||
let mut wildcard = false;
|
||||
|
||||
let mut restricted_fids = SearchableFids::default();
|
||||
for field_name in attributes_to_search_on {
|
||||
if field_name == "*" {
|
||||
wildcard = true;
|
||||
// we cannot early exit as we want to returns error in case of unknown fields
|
||||
continue;
|
||||
}
|
||||
let searchable_weight = searchable_names.iter().find(|(name, _, _)| name == field_name);
|
||||
let (fid, weight) = match searchable_weight {
|
||||
// The Field id exist and the field is searchable
|
||||
Some((_name, fid, weight)) => (*fid, *weight),
|
||||
// The field is not searchable but the user didn't define any searchable attributes
|
||||
None if user_defined_searchable.is_none() => continue,
|
||||
// The field is not searchable => User error
|
||||
None => {
|
||||
let (valid_fields, hidden_fields) = self.index.remove_hidden_fields(
|
||||
@ -136,7 +140,16 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
};
|
||||
}
|
||||
|
||||
self.searchable_fids = restricted_fids;
|
||||
if wildcard {
|
||||
let (exact, tolerant) = searchable_names
|
||||
.iter()
|
||||
.map(|(_name, fid, weight)| (*fid, *weight))
|
||||
.partition(|(fid, _weight)| exact_attributes_ids.contains(fid));
|
||||
|
||||
self.searchable_fids = SearchableFids { tolerant, exact };
|
||||
} else {
|
||||
self.searchable_fids = restricted_fids;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
@ -7,12 +7,12 @@ use crate::search::new::interner::{DedupInterner, Interned};
|
||||
use crate::search::new::query_term::LocatedQueryTermSubset;
|
||||
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids_within_field_id;
|
||||
use crate::search::new::SearchContext;
|
||||
use crate::Result;
|
||||
use crate::{FieldId, Result};
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||
pub struct FidCondition {
|
||||
term: LocatedQueryTermSubset,
|
||||
fid: u16,
|
||||
fid: Option<FieldId>,
|
||||
}
|
||||
|
||||
pub enum FidGraph {}
|
||||
@ -26,13 +26,16 @@ impl RankingRuleGraphTrait for FidGraph {
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<ComputedCondition> {
|
||||
let FidCondition { term, .. } = condition;
|
||||
// maybe compute_query_term_subset_docids_within_field_id should accept a universe as argument
|
||||
let mut docids = compute_query_term_subset_docids_within_field_id(
|
||||
ctx,
|
||||
&term.term_subset,
|
||||
condition.fid,
|
||||
)?;
|
||||
docids &= universe;
|
||||
|
||||
let docids = if let Some(fid) = condition.fid {
|
||||
// maybe compute_query_term_subset_docids_within_field_id should accept a universe as argument
|
||||
let mut docids =
|
||||
compute_query_term_subset_docids_within_field_id(ctx, &term.term_subset, fid)?;
|
||||
docids &= universe;
|
||||
docids
|
||||
} else {
|
||||
RoaringBitmap::new()
|
||||
};
|
||||
|
||||
Ok(ComputedCondition {
|
||||
docids,
|
||||
@ -68,24 +71,27 @@ impl RankingRuleGraphTrait for FidGraph {
|
||||
all_fields.extend(fields);
|
||||
}
|
||||
|
||||
let weights_map = ctx.index.fieldids_weights_map(ctx.txn)?;
|
||||
|
||||
let mut edges = vec![];
|
||||
for fid in all_fields.iter().copied() {
|
||||
let weight = weights_map.weight(fid).unwrap();
|
||||
edges.push((
|
||||
fid as u32 * term.term_ids.len() as u32,
|
||||
conditions_interner.insert(FidCondition { term: term.clone(), fid }),
|
||||
weight as u32 * term.term_ids.len() as u32,
|
||||
conditions_interner.insert(FidCondition { term: term.clone(), fid: Some(fid) }),
|
||||
));
|
||||
}
|
||||
|
||||
// always lookup the max_fid if we don't already and add an artificial condition for max scoring
|
||||
let max_fid: Option<u16> = ctx.index.searchable_fields_ids(ctx.txn)?.into_iter().max();
|
||||
let max_weight: Option<u16> = weights_map.max_weight();
|
||||
|
||||
if let Some(max_fid) = max_fid {
|
||||
if !all_fields.contains(&max_fid) {
|
||||
if let Some(max_weight) = max_weight {
|
||||
if !all_fields.contains(&max_weight) {
|
||||
edges.push((
|
||||
max_fid as u32 * term.term_ids.len() as u32, // TODO improve the fid score i.e. fid^10.
|
||||
max_weight as u32 * term.term_ids.len() as u32, // TODO improve the fid score i.e. fid^10.
|
||||
conditions_interner.insert(FidCondition {
|
||||
term: term.clone(), // TODO remove this ugly clone
|
||||
fid: max_fid,
|
||||
fid: None,
|
||||
}),
|
||||
));
|
||||
}
|
||||
|
@ -132,17 +132,17 @@ fn test_attribute_fid_simple() {
|
||||
fn test_attribute_fid_ngrams() {
|
||||
let index = create_index();
|
||||
db_snap!(index, fields_ids_map, @r###"
|
||||
0 title |
|
||||
1 description |
|
||||
2 plot |
|
||||
3 id |
|
||||
0 id |
|
||||
1 title |
|
||||
2 description |
|
||||
3 plot |
|
||||
"###);
|
||||
db_snap!(index, searchable_fields, @r###"["title", "description", "plot"]"###);
|
||||
db_snap!(index, fieldids_weights_map, @r###"
|
||||
fid weight
|
||||
0 0 |
|
||||
1 1 |
|
||||
2 2 |
|
||||
1 0 |
|
||||
2 1 |
|
||||
3 2 |
|
||||
"###);
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
@ -0,0 +1,244 @@
|
||||
---
|
||||
source: milli/src/search/new/tests/attribute_fid.rs
|
||||
expression: "format!(\"{document_ids_scores:#?}\")"
|
||||
---
|
||||
[
|
||||
(
|
||||
2,
|
||||
[
|
||||
Fid(
|
||||
Rank {
|
||||
rank: 19,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
Position(
|
||||
Rank {
|
||||
rank: 91,
|
||||
max_rank: 91,
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
(
|
||||
6,
|
||||
[
|
||||
Fid(
|
||||
Rank {
|
||||
rank: 15,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
Position(
|
||||
Rank {
|
||||
rank: 81,
|
||||
max_rank: 91,
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
(
|
||||
5,
|
||||
[
|
||||
Fid(
|
||||
Rank {
|
||||
rank: 14,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
Position(
|
||||
Rank {
|
||||
rank: 79,
|
||||
max_rank: 91,
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
(
|
||||
4,
|
||||
[
|
||||
Fid(
|
||||
Rank {
|
||||
rank: 13,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
Position(
|
||||
Rank {
|
||||
rank: 77,
|
||||
max_rank: 91,
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
(
|
||||
3,
|
||||
[
|
||||
Fid(
|
||||
Rank {
|
||||
rank: 12,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
Position(
|
||||
Rank {
|
||||
rank: 83,
|
||||
max_rank: 91,
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
(
|
||||
9,
|
||||
[
|
||||
Fid(
|
||||
Rank {
|
||||
rank: 11,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
Position(
|
||||
Rank {
|
||||
rank: 75,
|
||||
max_rank: 91,
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
(
|
||||
8,
|
||||
[
|
||||
Fid(
|
||||
Rank {
|
||||
rank: 10,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
Position(
|
||||
Rank {
|
||||
rank: 79,
|
||||
max_rank: 91,
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
(
|
||||
7,
|
||||
[
|
||||
Fid(
|
||||
Rank {
|
||||
rank: 10,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
Position(
|
||||
Rank {
|
||||
rank: 73,
|
||||
max_rank: 91,
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
(
|
||||
11,
|
||||
[
|
||||
Fid(
|
||||
Rank {
|
||||
rank: 7,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
Position(
|
||||
Rank {
|
||||
rank: 77,
|
||||
max_rank: 91,
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
(
|
||||
10,
|
||||
[
|
||||
Fid(
|
||||
Rank {
|
||||
rank: 6,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
Position(
|
||||
Rank {
|
||||
rank: 81,
|
||||
max_rank: 91,
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
(
|
||||
13,
|
||||
[
|
||||
Fid(
|
||||
Rank {
|
||||
rank: 6,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
Position(
|
||||
Rank {
|
||||
rank: 81,
|
||||
max_rank: 91,
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
(
|
||||
12,
|
||||
[
|
||||
Fid(
|
||||
Rank {
|
||||
rank: 6,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
Position(
|
||||
Rank {
|
||||
rank: 78,
|
||||
max_rank: 91,
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
(
|
||||
14,
|
||||
[
|
||||
Fid(
|
||||
Rank {
|
||||
rank: 5,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
Position(
|
||||
Rank {
|
||||
rank: 75,
|
||||
max_rank: 91,
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
(
|
||||
0,
|
||||
[
|
||||
Fid(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
Position(
|
||||
Rank {
|
||||
rank: 91,
|
||||
max_rank: 91,
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
]
|
@ -12,7 +12,6 @@ use time::OffsetDateTime;
|
||||
use super::index_documents::{IndexDocumentsConfig, Transform};
|
||||
use super::IndexerConfig;
|
||||
use crate::criterion::Criterion;
|
||||
use crate::documents::FieldIdMapper;
|
||||
use crate::error::UserError;
|
||||
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
|
||||
use crate::order_by_map::OrderByMap;
|
||||
@ -1562,8 +1561,9 @@ mod tests {
|
||||
// we must find the appropriate document.
|
||||
let result = index.search(&rtxn).query(r#""kevin""#).execute().unwrap();
|
||||
let documents = index.documents(&rtxn, result.documents_ids).unwrap();
|
||||
let fid_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
assert_eq!(documents.len(), 1);
|
||||
assert_eq!(documents[0].1.get(0), Some(&br#""kevin""#[..]));
|
||||
assert_eq!(documents[0].1.get(fid_map.id("name").unwrap()), Some(&br#""kevin""#[..]));
|
||||
drop(rtxn);
|
||||
|
||||
// We change the searchable fields to be the "name" field only.
|
||||
@ -1575,12 +1575,16 @@ mod tests {
|
||||
|
||||
// Check that the searchable field have been reset and documents are found now.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let fid_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let user_defined_searchable_fields = index.user_defined_searchable_fields(&rtxn).unwrap();
|
||||
snapshot!(format!("{user_defined_searchable_fields:?}"), @"None");
|
||||
// the searchable fields should contain all the fields
|
||||
let searchable_fields = index.searchable_fields(&rtxn).unwrap();
|
||||
snapshot!(format!("{searchable_fields:?}"), @r###"["name", "id", "age"]"###);
|
||||
snapshot!(format!("{searchable_fields:?}"), @r###"["id", "name", "age"]"###);
|
||||
let result = index.search(&rtxn).query("23").execute().unwrap();
|
||||
assert_eq!(result.documents_ids.len(), 1);
|
||||
let documents = index.documents(&rtxn, result.documents_ids).unwrap();
|
||||
assert_eq!(documents[0].1.get(0), Some(&br#""kevin""#[..]));
|
||||
assert_eq!(documents[0].1.get(fid_map.id("name").unwrap()), Some(&br#""kevin""#[..]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
Loading…
Reference in New Issue
Block a user