make the attribute ranking rule use the weights and fix the tests

This commit is contained in:
Tamo 2024-05-14 16:56:08 +02:00
parent a0082c4df9
commit caa6a7149a
7 changed files with 306 additions and 36 deletions

View File

@ -85,6 +85,9 @@ impl SearchQueue {
},
search_request = receive_new_searches.recv() => {
if search_request.is_none() {
continue;
}
// this unwrap is safe because we're sure the `SearchQueue` still lives somewhere in actix-web
let search_request = search_request.unwrap();
if searches_running < usize::from(parallelism) && queue.is_empty() {

View File

@ -2722,8 +2722,8 @@ pub(crate) mod tests {
// We should find tamo first
insta::assert_debug_snapshot!(results.documents_ids, @r###"
[
0,
1,
0,
]
"###);
}

View File

@ -76,7 +76,7 @@ impl<'ctx> SearchContext<'ctx> {
let mut exact = Vec::new();
let mut tolerant = Vec::new();
for (name, fid, weight) in searchable_fids {
for (_name, fid, weight) in searchable_fids {
if exact_attributes_ids.contains(&fid) {
exact.push((fid, weight));
} else {
@ -96,22 +96,26 @@ impl<'ctx> SearchContext<'ctx> {
})
}
// TODO: TAMO continue here
pub fn searchable_attributes(&mut self, attributes_to_search_on: &'ctx [String]) -> Result<()> {
if attributes_to_search_on.contains(&String::from("*")) {
return Ok(());
}
let fids_map = self.index.fields_ids_map(self.txn)?;
let user_defined_searchable = self.index.user_defined_searchable_fields(self.txn)?;
let searchable_names = self.index.searchable_fields_and_weights(self.txn)?;
let exact_attributes_ids = self.index.exact_attributes_ids(self.txn)?;
let mut wildcard = false;
let mut restricted_fids = SearchableFids::default();
for field_name in attributes_to_search_on {
if field_name == "*" {
wildcard = true;
// we cannot early exit as we want to returns error in case of unknown fields
continue;
}
let searchable_weight = searchable_names.iter().find(|(name, _, _)| name == field_name);
let (fid, weight) = match searchable_weight {
// The Field id exist and the field is searchable
Some((_name, fid, weight)) => (*fid, *weight),
// The field is not searchable but the user didn't define any searchable attributes
None if user_defined_searchable.is_none() => continue,
// The field is not searchable => User error
None => {
let (valid_fields, hidden_fields) = self.index.remove_hidden_fields(
@ -136,7 +140,16 @@ impl<'ctx> SearchContext<'ctx> {
};
}
self.searchable_fids = restricted_fids;
if wildcard {
let (exact, tolerant) = searchable_names
.iter()
.map(|(_name, fid, weight)| (*fid, *weight))
.partition(|(fid, _weight)| exact_attributes_ids.contains(fid));
self.searchable_fids = SearchableFids { tolerant, exact };
} else {
self.searchable_fids = restricted_fids;
}
Ok(())
}

View File

@ -7,12 +7,12 @@ use crate::search::new::interner::{DedupInterner, Interned};
use crate::search::new::query_term::LocatedQueryTermSubset;
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids_within_field_id;
use crate::search::new::SearchContext;
use crate::Result;
use crate::{FieldId, Result};
#[derive(Clone, PartialEq, Eq, Hash)]
pub struct FidCondition {
term: LocatedQueryTermSubset,
fid: u16,
fid: Option<FieldId>,
}
pub enum FidGraph {}
@ -26,13 +26,16 @@ impl RankingRuleGraphTrait for FidGraph {
universe: &RoaringBitmap,
) -> Result<ComputedCondition> {
let FidCondition { term, .. } = condition;
// maybe compute_query_term_subset_docids_within_field_id should accept a universe as argument
let mut docids = compute_query_term_subset_docids_within_field_id(
ctx,
&term.term_subset,
condition.fid,
)?;
docids &= universe;
let docids = if let Some(fid) = condition.fid {
// maybe compute_query_term_subset_docids_within_field_id should accept a universe as argument
let mut docids =
compute_query_term_subset_docids_within_field_id(ctx, &term.term_subset, fid)?;
docids &= universe;
docids
} else {
RoaringBitmap::new()
};
Ok(ComputedCondition {
docids,
@ -68,24 +71,27 @@ impl RankingRuleGraphTrait for FidGraph {
all_fields.extend(fields);
}
let weights_map = ctx.index.fieldids_weights_map(ctx.txn)?;
let mut edges = vec![];
for fid in all_fields.iter().copied() {
let weight = weights_map.weight(fid).unwrap();
edges.push((
fid as u32 * term.term_ids.len() as u32,
conditions_interner.insert(FidCondition { term: term.clone(), fid }),
weight as u32 * term.term_ids.len() as u32,
conditions_interner.insert(FidCondition { term: term.clone(), fid: Some(fid) }),
));
}
// always lookup the max_fid if we don't already and add an artificial condition for max scoring
let max_fid: Option<u16> = ctx.index.searchable_fields_ids(ctx.txn)?.into_iter().max();
let max_weight: Option<u16> = weights_map.max_weight();
if let Some(max_fid) = max_fid {
if !all_fields.contains(&max_fid) {
if let Some(max_weight) = max_weight {
if !all_fields.contains(&max_weight) {
edges.push((
max_fid as u32 * term.term_ids.len() as u32, // TODO improve the fid score i.e. fid^10.
max_weight as u32 * term.term_ids.len() as u32, // TODO improve the fid score i.e. fid^10.
conditions_interner.insert(FidCondition {
term: term.clone(), // TODO remove this ugly clone
fid: max_fid,
fid: None,
}),
));
}

View File

@ -132,17 +132,17 @@ fn test_attribute_fid_simple() {
fn test_attribute_fid_ngrams() {
let index = create_index();
db_snap!(index, fields_ids_map, @r###"
0 title |
1 description |
2 plot |
3 id |
0 id |
1 title |
2 description |
3 plot |
"###);
db_snap!(index, searchable_fields, @r###"["title", "description", "plot"]"###);
db_snap!(index, fieldids_weights_map, @r###"
fid weight
0 0 |
1 1 |
2 2 |
1 0 |
2 1 |
3 2 |
"###);
let txn = index.read_txn().unwrap();

View File

@ -0,0 +1,244 @@
---
source: milli/src/search/new/tests/attribute_fid.rs
expression: "format!(\"{document_ids_scores:#?}\")"
---
[
(
2,
[
Fid(
Rank {
rank: 19,
max_rank: 19,
},
),
Position(
Rank {
rank: 91,
max_rank: 91,
},
),
],
),
(
6,
[
Fid(
Rank {
rank: 15,
max_rank: 19,
},
),
Position(
Rank {
rank: 81,
max_rank: 91,
},
),
],
),
(
5,
[
Fid(
Rank {
rank: 14,
max_rank: 19,
},
),
Position(
Rank {
rank: 79,
max_rank: 91,
},
),
],
),
(
4,
[
Fid(
Rank {
rank: 13,
max_rank: 19,
},
),
Position(
Rank {
rank: 77,
max_rank: 91,
},
),
],
),
(
3,
[
Fid(
Rank {
rank: 12,
max_rank: 19,
},
),
Position(
Rank {
rank: 83,
max_rank: 91,
},
),
],
),
(
9,
[
Fid(
Rank {
rank: 11,
max_rank: 19,
},
),
Position(
Rank {
rank: 75,
max_rank: 91,
},
),
],
),
(
8,
[
Fid(
Rank {
rank: 10,
max_rank: 19,
},
),
Position(
Rank {
rank: 79,
max_rank: 91,
},
),
],
),
(
7,
[
Fid(
Rank {
rank: 10,
max_rank: 19,
},
),
Position(
Rank {
rank: 73,
max_rank: 91,
},
),
],
),
(
11,
[
Fid(
Rank {
rank: 7,
max_rank: 19,
},
),
Position(
Rank {
rank: 77,
max_rank: 91,
},
),
],
),
(
10,
[
Fid(
Rank {
rank: 6,
max_rank: 19,
},
),
Position(
Rank {
rank: 81,
max_rank: 91,
},
),
],
),
(
13,
[
Fid(
Rank {
rank: 6,
max_rank: 19,
},
),
Position(
Rank {
rank: 81,
max_rank: 91,
},
),
],
),
(
12,
[
Fid(
Rank {
rank: 6,
max_rank: 19,
},
),
Position(
Rank {
rank: 78,
max_rank: 91,
},
),
],
),
(
14,
[
Fid(
Rank {
rank: 5,
max_rank: 19,
},
),
Position(
Rank {
rank: 75,
max_rank: 91,
},
),
],
),
(
0,
[
Fid(
Rank {
rank: 1,
max_rank: 19,
},
),
Position(
Rank {
rank: 91,
max_rank: 91,
},
),
],
),
]

View File

@ -12,7 +12,6 @@ use time::OffsetDateTime;
use super::index_documents::{IndexDocumentsConfig, Transform};
use super::IndexerConfig;
use crate::criterion::Criterion;
use crate::documents::FieldIdMapper;
use crate::error::UserError;
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
use crate::order_by_map::OrderByMap;
@ -1562,8 +1561,9 @@ mod tests {
// we must find the appropriate document.
let result = index.search(&rtxn).query(r#""kevin""#).execute().unwrap();
let documents = index.documents(&rtxn, result.documents_ids).unwrap();
let fid_map = index.fields_ids_map(&rtxn).unwrap();
assert_eq!(documents.len(), 1);
assert_eq!(documents[0].1.get(0), Some(&br#""kevin""#[..]));
assert_eq!(documents[0].1.get(fid_map.id("name").unwrap()), Some(&br#""kevin""#[..]));
drop(rtxn);
// We change the searchable fields to be the "name" field only.
@ -1575,12 +1575,16 @@ mod tests {
// Check that the searchable field have been reset and documents are found now.
let rtxn = index.read_txn().unwrap();
let fid_map = index.fields_ids_map(&rtxn).unwrap();
let user_defined_searchable_fields = index.user_defined_searchable_fields(&rtxn).unwrap();
snapshot!(format!("{user_defined_searchable_fields:?}"), @"None");
// the searchable fields should contain all the fields
let searchable_fields = index.searchable_fields(&rtxn).unwrap();
snapshot!(format!("{searchable_fields:?}"), @r###"["name", "id", "age"]"###);
snapshot!(format!("{searchable_fields:?}"), @r###"["id", "name", "age"]"###);
let result = index.search(&rtxn).query("23").execute().unwrap();
assert_eq!(result.documents_ids.len(), 1);
let documents = index.documents(&rtxn, result.documents_ids).unwrap();
assert_eq!(documents[0].1.get(0), Some(&br#""kevin""#[..]));
assert_eq!(documents[0].1.get(fid_map.id("name").unwrap()), Some(&br#""kevin""#[..]));
}
#[test]