mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 14:54:27 +01:00
make the attribute ranking rule use the weights and fix the tests
This commit is contained in:
parent
a0082c4df9
commit
caa6a7149a
@ -85,6 +85,9 @@ impl SearchQueue {
|
|||||||
},
|
},
|
||||||
|
|
||||||
search_request = receive_new_searches.recv() => {
|
search_request = receive_new_searches.recv() => {
|
||||||
|
if search_request.is_none() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
// this unwrap is safe because we're sure the `SearchQueue` still lives somewhere in actix-web
|
// this unwrap is safe because we're sure the `SearchQueue` still lives somewhere in actix-web
|
||||||
let search_request = search_request.unwrap();
|
let search_request = search_request.unwrap();
|
||||||
if searches_running < usize::from(parallelism) && queue.is_empty() {
|
if searches_running < usize::from(parallelism) && queue.is_empty() {
|
||||||
|
@ -2722,8 +2722,8 @@ pub(crate) mod tests {
|
|||||||
// We should find tamo first
|
// We should find tamo first
|
||||||
insta::assert_debug_snapshot!(results.documents_ids, @r###"
|
insta::assert_debug_snapshot!(results.documents_ids, @r###"
|
||||||
[
|
[
|
||||||
0,
|
|
||||||
1,
|
1,
|
||||||
|
0,
|
||||||
]
|
]
|
||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
|
@ -76,7 +76,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
|
|
||||||
let mut exact = Vec::new();
|
let mut exact = Vec::new();
|
||||||
let mut tolerant = Vec::new();
|
let mut tolerant = Vec::new();
|
||||||
for (name, fid, weight) in searchable_fids {
|
for (_name, fid, weight) in searchable_fids {
|
||||||
if exact_attributes_ids.contains(&fid) {
|
if exact_attributes_ids.contains(&fid) {
|
||||||
exact.push((fid, weight));
|
exact.push((fid, weight));
|
||||||
} else {
|
} else {
|
||||||
@ -96,22 +96,26 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: TAMO continue here
|
|
||||||
pub fn searchable_attributes(&mut self, attributes_to_search_on: &'ctx [String]) -> Result<()> {
|
pub fn searchable_attributes(&mut self, attributes_to_search_on: &'ctx [String]) -> Result<()> {
|
||||||
if attributes_to_search_on.contains(&String::from("*")) {
|
let user_defined_searchable = self.index.user_defined_searchable_fields(self.txn)?;
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
|
|
||||||
let fids_map = self.index.fields_ids_map(self.txn)?;
|
|
||||||
let searchable_names = self.index.searchable_fields_and_weights(self.txn)?;
|
let searchable_names = self.index.searchable_fields_and_weights(self.txn)?;
|
||||||
let exact_attributes_ids = self.index.exact_attributes_ids(self.txn)?;
|
let exact_attributes_ids = self.index.exact_attributes_ids(self.txn)?;
|
||||||
|
|
||||||
|
let mut wildcard = false;
|
||||||
|
|
||||||
let mut restricted_fids = SearchableFids::default();
|
let mut restricted_fids = SearchableFids::default();
|
||||||
for field_name in attributes_to_search_on {
|
for field_name in attributes_to_search_on {
|
||||||
|
if field_name == "*" {
|
||||||
|
wildcard = true;
|
||||||
|
// we cannot early exit as we want to returns error in case of unknown fields
|
||||||
|
continue;
|
||||||
|
}
|
||||||
let searchable_weight = searchable_names.iter().find(|(name, _, _)| name == field_name);
|
let searchable_weight = searchable_names.iter().find(|(name, _, _)| name == field_name);
|
||||||
let (fid, weight) = match searchable_weight {
|
let (fid, weight) = match searchable_weight {
|
||||||
// The Field id exist and the field is searchable
|
// The Field id exist and the field is searchable
|
||||||
Some((_name, fid, weight)) => (*fid, *weight),
|
Some((_name, fid, weight)) => (*fid, *weight),
|
||||||
|
// The field is not searchable but the user didn't define any searchable attributes
|
||||||
|
None if user_defined_searchable.is_none() => continue,
|
||||||
// The field is not searchable => User error
|
// The field is not searchable => User error
|
||||||
None => {
|
None => {
|
||||||
let (valid_fields, hidden_fields) = self.index.remove_hidden_fields(
|
let (valid_fields, hidden_fields) = self.index.remove_hidden_fields(
|
||||||
@ -136,7 +140,16 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if wildcard {
|
||||||
|
let (exact, tolerant) = searchable_names
|
||||||
|
.iter()
|
||||||
|
.map(|(_name, fid, weight)| (*fid, *weight))
|
||||||
|
.partition(|(fid, _weight)| exact_attributes_ids.contains(fid));
|
||||||
|
|
||||||
|
self.searchable_fids = SearchableFids { tolerant, exact };
|
||||||
|
} else {
|
||||||
self.searchable_fids = restricted_fids;
|
self.searchable_fids = restricted_fids;
|
||||||
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
@ -7,12 +7,12 @@ use crate::search::new::interner::{DedupInterner, Interned};
|
|||||||
use crate::search::new::query_term::LocatedQueryTermSubset;
|
use crate::search::new::query_term::LocatedQueryTermSubset;
|
||||||
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids_within_field_id;
|
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids_within_field_id;
|
||||||
use crate::search::new::SearchContext;
|
use crate::search::new::SearchContext;
|
||||||
use crate::Result;
|
use crate::{FieldId, Result};
|
||||||
|
|
||||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||||
pub struct FidCondition {
|
pub struct FidCondition {
|
||||||
term: LocatedQueryTermSubset,
|
term: LocatedQueryTermSubset,
|
||||||
fid: u16,
|
fid: Option<FieldId>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub enum FidGraph {}
|
pub enum FidGraph {}
|
||||||
@ -26,13 +26,16 @@ impl RankingRuleGraphTrait for FidGraph {
|
|||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
) -> Result<ComputedCondition> {
|
) -> Result<ComputedCondition> {
|
||||||
let FidCondition { term, .. } = condition;
|
let FidCondition { term, .. } = condition;
|
||||||
|
|
||||||
|
let docids = if let Some(fid) = condition.fid {
|
||||||
// maybe compute_query_term_subset_docids_within_field_id should accept a universe as argument
|
// maybe compute_query_term_subset_docids_within_field_id should accept a universe as argument
|
||||||
let mut docids = compute_query_term_subset_docids_within_field_id(
|
let mut docids =
|
||||||
ctx,
|
compute_query_term_subset_docids_within_field_id(ctx, &term.term_subset, fid)?;
|
||||||
&term.term_subset,
|
|
||||||
condition.fid,
|
|
||||||
)?;
|
|
||||||
docids &= universe;
|
docids &= universe;
|
||||||
|
docids
|
||||||
|
} else {
|
||||||
|
RoaringBitmap::new()
|
||||||
|
};
|
||||||
|
|
||||||
Ok(ComputedCondition {
|
Ok(ComputedCondition {
|
||||||
docids,
|
docids,
|
||||||
@ -68,24 +71,27 @@ impl RankingRuleGraphTrait for FidGraph {
|
|||||||
all_fields.extend(fields);
|
all_fields.extend(fields);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let weights_map = ctx.index.fieldids_weights_map(ctx.txn)?;
|
||||||
|
|
||||||
let mut edges = vec![];
|
let mut edges = vec![];
|
||||||
for fid in all_fields.iter().copied() {
|
for fid in all_fields.iter().copied() {
|
||||||
|
let weight = weights_map.weight(fid).unwrap();
|
||||||
edges.push((
|
edges.push((
|
||||||
fid as u32 * term.term_ids.len() as u32,
|
weight as u32 * term.term_ids.len() as u32,
|
||||||
conditions_interner.insert(FidCondition { term: term.clone(), fid }),
|
conditions_interner.insert(FidCondition { term: term.clone(), fid: Some(fid) }),
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
// always lookup the max_fid if we don't already and add an artificial condition for max scoring
|
// always lookup the max_fid if we don't already and add an artificial condition for max scoring
|
||||||
let max_fid: Option<u16> = ctx.index.searchable_fields_ids(ctx.txn)?.into_iter().max();
|
let max_weight: Option<u16> = weights_map.max_weight();
|
||||||
|
|
||||||
if let Some(max_fid) = max_fid {
|
if let Some(max_weight) = max_weight {
|
||||||
if !all_fields.contains(&max_fid) {
|
if !all_fields.contains(&max_weight) {
|
||||||
edges.push((
|
edges.push((
|
||||||
max_fid as u32 * term.term_ids.len() as u32, // TODO improve the fid score i.e. fid^10.
|
max_weight as u32 * term.term_ids.len() as u32, // TODO improve the fid score i.e. fid^10.
|
||||||
conditions_interner.insert(FidCondition {
|
conditions_interner.insert(FidCondition {
|
||||||
term: term.clone(), // TODO remove this ugly clone
|
term: term.clone(), // TODO remove this ugly clone
|
||||||
fid: max_fid,
|
fid: None,
|
||||||
}),
|
}),
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
@ -132,17 +132,17 @@ fn test_attribute_fid_simple() {
|
|||||||
fn test_attribute_fid_ngrams() {
|
fn test_attribute_fid_ngrams() {
|
||||||
let index = create_index();
|
let index = create_index();
|
||||||
db_snap!(index, fields_ids_map, @r###"
|
db_snap!(index, fields_ids_map, @r###"
|
||||||
0 title |
|
0 id |
|
||||||
1 description |
|
1 title |
|
||||||
2 plot |
|
2 description |
|
||||||
3 id |
|
3 plot |
|
||||||
"###);
|
"###);
|
||||||
db_snap!(index, searchable_fields, @r###"["title", "description", "plot"]"###);
|
db_snap!(index, searchable_fields, @r###"["title", "description", "plot"]"###);
|
||||||
db_snap!(index, fieldids_weights_map, @r###"
|
db_snap!(index, fieldids_weights_map, @r###"
|
||||||
fid weight
|
fid weight
|
||||||
0 0 |
|
1 0 |
|
||||||
1 1 |
|
2 1 |
|
||||||
2 2 |
|
3 2 |
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
let txn = index.read_txn().unwrap();
|
let txn = index.read_txn().unwrap();
|
||||||
|
@ -0,0 +1,244 @@
|
|||||||
|
---
|
||||||
|
source: milli/src/search/new/tests/attribute_fid.rs
|
||||||
|
expression: "format!(\"{document_ids_scores:#?}\")"
|
||||||
|
---
|
||||||
|
[
|
||||||
|
(
|
||||||
|
2,
|
||||||
|
[
|
||||||
|
Fid(
|
||||||
|
Rank {
|
||||||
|
rank: 19,
|
||||||
|
max_rank: 19,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Position(
|
||||||
|
Rank {
|
||||||
|
rank: 91,
|
||||||
|
max_rank: 91,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
6,
|
||||||
|
[
|
||||||
|
Fid(
|
||||||
|
Rank {
|
||||||
|
rank: 15,
|
||||||
|
max_rank: 19,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Position(
|
||||||
|
Rank {
|
||||||
|
rank: 81,
|
||||||
|
max_rank: 91,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
5,
|
||||||
|
[
|
||||||
|
Fid(
|
||||||
|
Rank {
|
||||||
|
rank: 14,
|
||||||
|
max_rank: 19,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Position(
|
||||||
|
Rank {
|
||||||
|
rank: 79,
|
||||||
|
max_rank: 91,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
4,
|
||||||
|
[
|
||||||
|
Fid(
|
||||||
|
Rank {
|
||||||
|
rank: 13,
|
||||||
|
max_rank: 19,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Position(
|
||||||
|
Rank {
|
||||||
|
rank: 77,
|
||||||
|
max_rank: 91,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
3,
|
||||||
|
[
|
||||||
|
Fid(
|
||||||
|
Rank {
|
||||||
|
rank: 12,
|
||||||
|
max_rank: 19,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Position(
|
||||||
|
Rank {
|
||||||
|
rank: 83,
|
||||||
|
max_rank: 91,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
9,
|
||||||
|
[
|
||||||
|
Fid(
|
||||||
|
Rank {
|
||||||
|
rank: 11,
|
||||||
|
max_rank: 19,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Position(
|
||||||
|
Rank {
|
||||||
|
rank: 75,
|
||||||
|
max_rank: 91,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
8,
|
||||||
|
[
|
||||||
|
Fid(
|
||||||
|
Rank {
|
||||||
|
rank: 10,
|
||||||
|
max_rank: 19,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Position(
|
||||||
|
Rank {
|
||||||
|
rank: 79,
|
||||||
|
max_rank: 91,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
7,
|
||||||
|
[
|
||||||
|
Fid(
|
||||||
|
Rank {
|
||||||
|
rank: 10,
|
||||||
|
max_rank: 19,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Position(
|
||||||
|
Rank {
|
||||||
|
rank: 73,
|
||||||
|
max_rank: 91,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
11,
|
||||||
|
[
|
||||||
|
Fid(
|
||||||
|
Rank {
|
||||||
|
rank: 7,
|
||||||
|
max_rank: 19,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Position(
|
||||||
|
Rank {
|
||||||
|
rank: 77,
|
||||||
|
max_rank: 91,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
10,
|
||||||
|
[
|
||||||
|
Fid(
|
||||||
|
Rank {
|
||||||
|
rank: 6,
|
||||||
|
max_rank: 19,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Position(
|
||||||
|
Rank {
|
||||||
|
rank: 81,
|
||||||
|
max_rank: 91,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
13,
|
||||||
|
[
|
||||||
|
Fid(
|
||||||
|
Rank {
|
||||||
|
rank: 6,
|
||||||
|
max_rank: 19,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Position(
|
||||||
|
Rank {
|
||||||
|
rank: 81,
|
||||||
|
max_rank: 91,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
12,
|
||||||
|
[
|
||||||
|
Fid(
|
||||||
|
Rank {
|
||||||
|
rank: 6,
|
||||||
|
max_rank: 19,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Position(
|
||||||
|
Rank {
|
||||||
|
rank: 78,
|
||||||
|
max_rank: 91,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
14,
|
||||||
|
[
|
||||||
|
Fid(
|
||||||
|
Rank {
|
||||||
|
rank: 5,
|
||||||
|
max_rank: 19,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Position(
|
||||||
|
Rank {
|
||||||
|
rank: 75,
|
||||||
|
max_rank: 91,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
0,
|
||||||
|
[
|
||||||
|
Fid(
|
||||||
|
Rank {
|
||||||
|
rank: 1,
|
||||||
|
max_rank: 19,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Position(
|
||||||
|
Rank {
|
||||||
|
rank: 91,
|
||||||
|
max_rank: 91,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
]
|
@ -12,7 +12,6 @@ use time::OffsetDateTime;
|
|||||||
use super::index_documents::{IndexDocumentsConfig, Transform};
|
use super::index_documents::{IndexDocumentsConfig, Transform};
|
||||||
use super::IndexerConfig;
|
use super::IndexerConfig;
|
||||||
use crate::criterion::Criterion;
|
use crate::criterion::Criterion;
|
||||||
use crate::documents::FieldIdMapper;
|
|
||||||
use crate::error::UserError;
|
use crate::error::UserError;
|
||||||
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
|
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
|
||||||
use crate::order_by_map::OrderByMap;
|
use crate::order_by_map::OrderByMap;
|
||||||
@ -1562,8 +1561,9 @@ mod tests {
|
|||||||
// we must find the appropriate document.
|
// we must find the appropriate document.
|
||||||
let result = index.search(&rtxn).query(r#""kevin""#).execute().unwrap();
|
let result = index.search(&rtxn).query(r#""kevin""#).execute().unwrap();
|
||||||
let documents = index.documents(&rtxn, result.documents_ids).unwrap();
|
let documents = index.documents(&rtxn, result.documents_ids).unwrap();
|
||||||
|
let fid_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
assert_eq!(documents.len(), 1);
|
assert_eq!(documents.len(), 1);
|
||||||
assert_eq!(documents[0].1.get(0), Some(&br#""kevin""#[..]));
|
assert_eq!(documents[0].1.get(fid_map.id("name").unwrap()), Some(&br#""kevin""#[..]));
|
||||||
drop(rtxn);
|
drop(rtxn);
|
||||||
|
|
||||||
// We change the searchable fields to be the "name" field only.
|
// We change the searchable fields to be the "name" field only.
|
||||||
@ -1575,12 +1575,16 @@ mod tests {
|
|||||||
|
|
||||||
// Check that the searchable field have been reset and documents are found now.
|
// Check that the searchable field have been reset and documents are found now.
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let fid_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
|
let user_defined_searchable_fields = index.user_defined_searchable_fields(&rtxn).unwrap();
|
||||||
|
snapshot!(format!("{user_defined_searchable_fields:?}"), @"None");
|
||||||
|
// the searchable fields should contain all the fields
|
||||||
let searchable_fields = index.searchable_fields(&rtxn).unwrap();
|
let searchable_fields = index.searchable_fields(&rtxn).unwrap();
|
||||||
snapshot!(format!("{searchable_fields:?}"), @r###"["name", "id", "age"]"###);
|
snapshot!(format!("{searchable_fields:?}"), @r###"["id", "name", "age"]"###);
|
||||||
let result = index.search(&rtxn).query("23").execute().unwrap();
|
let result = index.search(&rtxn).query("23").execute().unwrap();
|
||||||
assert_eq!(result.documents_ids.len(), 1);
|
assert_eq!(result.documents_ids.len(), 1);
|
||||||
let documents = index.documents(&rtxn, result.documents_ids).unwrap();
|
let documents = index.documents(&rtxn, result.documents_ids).unwrap();
|
||||||
assert_eq!(documents[0].1.get(0), Some(&br#""kevin""#[..]));
|
assert_eq!(documents[0].1.get(fid_map.id("name").unwrap()), Some(&br#""kevin""#[..]));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
Loading…
Reference in New Issue
Block a user