202: Add field id word count docids database r=Kerollmops a=LegendreM

This PR introduces a new database, `field_id_word_count_docids`, that maps the number of words in an attribute with a list of document ids. This relation is limited to attributes that contain less than 11 words.
This database is used by the exactness criterion to know if a document has an attribute that contains exactly the query without any additional word.

Fix #165 
Fix #196
Related to [specifications:#36](https://github.com/meilisearch/specifications/pull/36)

Co-authored-by: many <maxime@meilisearch.com>
Co-authored-by: Many <legendre.maxime.isn@gmail.com>
This commit is contained in:
bors[bot] 2021-06-01 16:09:48 +00:00 committed by GitHub
commit 270da98c46
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 193 additions and 22 deletions

View file

@ -1,9 +1,10 @@
use std::convert::TryFrom;
use std::mem::take;
use std::ops::BitOr;
use log::debug;
use roaring::RoaringBitmap;
use itertools::Itertools;
use std::ops::BitOr;
use crate::search::query_tree::{Operation, PrimitiveQueryPart};
use crate::search::criteria::{
@ -162,23 +163,24 @@ fn resolve_state(
use State::*;
match state {
ExactAttribute(mut allowed_candidates) => {
let query_len = query.len() as u32;
let mut candidates = RoaringBitmap::new();
let attributes_ids = ctx.searchable_fields_ids()?;
for id in attributes_ids {
if let Some(attribute_allowed_docids) = ctx.field_id_len_docids(id, query_len)? {
let mut attribute_candidates_array = attribute_start_with_docids(ctx, id as u32, query)?;
attribute_candidates_array.push(attribute_allowed_docids);
candidates |= intersection_of(attribute_candidates_array.iter().collect());
if let Ok(query_len) = u8::try_from(query.len()) {
let attributes_ids = ctx.searchable_fields_ids()?;
for id in attributes_ids {
if let Some(attribute_allowed_docids) = ctx.field_id_word_count_docids(id, query_len)? {
let mut attribute_candidates_array = attribute_start_with_docids(ctx, id as u32, query)?;
attribute_candidates_array.push(attribute_allowed_docids);
candidates |= intersection_of(attribute_candidates_array.iter().collect());
}
}
// only keep allowed candidates
candidates &= &allowed_candidates;
// remove current candidates from allowed candidates
allowed_candidates -= &candidates;
}
// only keep allowed candidates
candidates &= &allowed_candidates;
// remove current candidates from allowed candidates
allowed_candidates -= &candidates;
Ok((candidates, Some(AttributeStartsWith(allowed_candidates))))
},
AttributeStartsWith(mut allowed_candidates) => {
let mut candidates = RoaringBitmap::new();

View file

@ -78,7 +78,7 @@ pub trait Context<'c> {
fn word_position_last_level(&self, word: &str, in_prefix_cache: bool) -> heed::Result<Option<TreeLevel>>;
fn synonyms(&self, word: &str) -> heed::Result<Option<Vec<Vec<String>>>>;
fn searchable_fields_ids(&self) -> heed::Result<Vec<FieldId>>;
fn field_id_len_docids(&self, field_id: FieldId, len: u32) -> heed::Result<Option<RoaringBitmap>>;
fn field_id_word_count_docids(&self, field_id: FieldId, word_count: u8) -> heed::Result<Option<RoaringBitmap>>;
fn word_level_position_docids(&self, word: &str, level: TreeLevel, left: u32, right: u32) -> Result<Option<RoaringBitmap>, heed::Error>;
}
pub struct CriteriaBuilder<'t> {
@ -181,8 +181,9 @@ impl<'c> Context<'c> for CriteriaBuilder<'c> {
}
}
fn field_id_len_docids(&self, _field_id: FieldId, _len: u32) -> heed::Result<Option<RoaringBitmap>> {
Ok(None)
fn field_id_word_count_docids(&self, field_id: FieldId, word_count: u8) -> heed::Result<Option<RoaringBitmap>> {
let key = (field_id, word_count);
self.index.field_id_word_count_docids.get(self.rtxn, &key)
}
fn word_level_position_docids(&self, word: &str, level: TreeLevel, left: u32, right: u32) -> Result<Option<RoaringBitmap>, heed::Error> {
@ -488,7 +489,7 @@ pub mod test {
todo!()
}
fn field_id_len_docids(&self, _field_id: FieldId, _len: u32) -> heed::Result<Option<RoaringBitmap>> {
fn field_id_word_count_docids(&self, _field_id: FieldId, _word_count: u8) -> heed::Result<Option<RoaringBitmap>> {
todo!()
}
}