308: Implement a better parallel indexer r=Kerollmops a=ManyTheFish

Rewrite the indexer:
- enhance memory consumption control
- optimize parallelism using rayon and crossbeam channel
- factorize the different parts and make new DB implementation easier
- optimize and fix prefix databases


Co-authored-by: many <maxime@meilisearch.com>
This commit is contained in:
bors[bot] 2021-09-02 15:03:52 +00:00 committed by GitHub
commit 5cbe879325
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
39 changed files with 2269 additions and 1868 deletions

View file

@ -461,13 +461,18 @@ fn query_pair_proximity_docids(
let prefix = right.prefix;
match (&left.kind, &right.kind) {
(QueryKind::Exact { word: left, .. }, QueryKind::Exact { word: right, .. }) => {
if prefix && ctx.in_prefix_cache(&right) {
Ok(ctx
.word_prefix_pair_proximity_docids(left.as_str(), right.as_str(), proximity)?
.unwrap_or_default())
} else if prefix {
let r_words = word_derivations(&right, true, 0, ctx.words_fst(), wdcache)?;
all_word_pair_proximity_docids(ctx, &[(left, 0)], &r_words, proximity)
if prefix {
match ctx.word_prefix_pair_proximity_docids(
left.as_str(),
right.as_str(),
proximity,
)? {
Some(docids) => Ok(docids),
None => {
let r_words = word_derivations(&right, true, 0, ctx.words_fst(), wdcache)?;
all_word_pair_proximity_docids(ctx, &[(left, 0)], &r_words, proximity)
}
}
} else {
Ok(ctx
.word_pair_proximity_docids(left.as_str(), right.as_str(), proximity)?
@ -477,22 +482,24 @@ fn query_pair_proximity_docids(
(QueryKind::Tolerant { typo, word: left }, QueryKind::Exact { word: right, .. }) => {
let l_words =
word_derivations(&left, false, *typo, ctx.words_fst(), wdcache)?.to_owned();
if prefix && ctx.in_prefix_cache(&right) {
if prefix {
let mut docids = RoaringBitmap::new();
for (left, _) in l_words {
let current_docids = ctx
.word_prefix_pair_proximity_docids(
left.as_ref(),
right.as_ref(),
proximity,
)?
.unwrap_or_default();
let current_docids = match ctx.word_prefix_pair_proximity_docids(
left.as_str(),
right.as_str(),
proximity,
)? {
Some(docids) => Ok(docids),
None => {
let r_words =
word_derivations(&right, true, 0, ctx.words_fst(), wdcache)?;
all_word_pair_proximity_docids(ctx, &[(left, 0)], &r_words, proximity)
}
}?;
docids |= current_docids;
}
Ok(docids)
} else if prefix {
let r_words = word_derivations(&right, true, 0, ctx.words_fst(), wdcache)?;
all_word_pair_proximity_docids(ctx, &l_words, &r_words, proximity)
} else {
all_word_pair_proximity_docids(ctx, &l_words, &[(right, 0)], proximity)
}

View file

@ -269,11 +269,7 @@ impl<'t> Iterator for FacetStringGroupRevRange<'t> {
///
/// It yields the facet string and the roaring bitmap associated with it.
pub struct FacetStringLevelZeroRange<'t> {
iter: RoRange<
't,
FacetStringLevelZeroCodec,
FacetStringLevelZeroValueCodec<CboRoaringBitmapCodec>,
>,
iter: RoRange<'t, FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec>,
}
impl<'t> FacetStringLevelZeroRange<'t> {
@ -316,10 +312,7 @@ impl<'t> FacetStringLevelZeroRange<'t> {
let iter = db
.remap_key_type::<ByteSlice>()
.range(rtxn, &(left_bound, right_bound))?
.remap_types::<
FacetStringLevelZeroCodec,
FacetStringLevelZeroValueCodec<CboRoaringBitmapCodec>
>();
.remap_types::<FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec>();
Ok(FacetStringLevelZeroRange { iter })
}
@ -340,11 +333,7 @@ impl<'t> Iterator for FacetStringLevelZeroRange<'t> {
}
pub struct FacetStringLevelZeroRevRange<'t> {
iter: RoRevRange<
't,
FacetStringLevelZeroCodec,
FacetStringLevelZeroValueCodec<CboRoaringBitmapCodec>,
>,
iter: RoRevRange<'t, FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec>,
}
impl<'t> FacetStringLevelZeroRevRange<'t> {
@ -387,10 +376,7 @@ impl<'t> FacetStringLevelZeroRevRange<'t> {
let iter = db
.remap_key_type::<ByteSlice>()
.rev_range(rtxn, &(left_bound, right_bound))?
.remap_types::<
FacetStringLevelZeroCodec,
FacetStringLevelZeroValueCodec<CboRoaringBitmapCodec>
>();
.remap_types::<FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec>();
Ok(FacetStringLevelZeroRevRange { iter })
}

View file

@ -392,10 +392,7 @@ impl FilterCondition {
rtxn: &heed::RoTxn,
index: &Index,
numbers_db: heed::Database<FacetLevelValueF64Codec, CboRoaringBitmapCodec>,
strings_db: heed::Database<
FacetStringLevelZeroCodec,
FacetStringLevelZeroValueCodec<CboRoaringBitmapCodec>,
>,
strings_db: heed::Database<FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec>,
field_id: FieldId,
operator: &Operator,
) -> Result<RoaringBitmap> {