mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 20:37:15 +02:00
Merge #308
308: Implement a better parallel indexer r=Kerollmops a=ManyTheFish Rewrite the indexer: - enhance memory consumption control - optimize parallelism using rayon and crossbeam channel - factorize the different parts and make new DB implementation easier - optimize and fix prefix databases Co-authored-by: many <maxime@meilisearch.com>
This commit is contained in:
commit
5cbe879325
39 changed files with 2269 additions and 1868 deletions
|
@ -461,13 +461,18 @@ fn query_pair_proximity_docids(
|
|||
let prefix = right.prefix;
|
||||
match (&left.kind, &right.kind) {
|
||||
(QueryKind::Exact { word: left, .. }, QueryKind::Exact { word: right, .. }) => {
|
||||
if prefix && ctx.in_prefix_cache(&right) {
|
||||
Ok(ctx
|
||||
.word_prefix_pair_proximity_docids(left.as_str(), right.as_str(), proximity)?
|
||||
.unwrap_or_default())
|
||||
} else if prefix {
|
||||
let r_words = word_derivations(&right, true, 0, ctx.words_fst(), wdcache)?;
|
||||
all_word_pair_proximity_docids(ctx, &[(left, 0)], &r_words, proximity)
|
||||
if prefix {
|
||||
match ctx.word_prefix_pair_proximity_docids(
|
||||
left.as_str(),
|
||||
right.as_str(),
|
||||
proximity,
|
||||
)? {
|
||||
Some(docids) => Ok(docids),
|
||||
None => {
|
||||
let r_words = word_derivations(&right, true, 0, ctx.words_fst(), wdcache)?;
|
||||
all_word_pair_proximity_docids(ctx, &[(left, 0)], &r_words, proximity)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Ok(ctx
|
||||
.word_pair_proximity_docids(left.as_str(), right.as_str(), proximity)?
|
||||
|
@ -477,22 +482,24 @@ fn query_pair_proximity_docids(
|
|||
(QueryKind::Tolerant { typo, word: left }, QueryKind::Exact { word: right, .. }) => {
|
||||
let l_words =
|
||||
word_derivations(&left, false, *typo, ctx.words_fst(), wdcache)?.to_owned();
|
||||
if prefix && ctx.in_prefix_cache(&right) {
|
||||
if prefix {
|
||||
let mut docids = RoaringBitmap::new();
|
||||
for (left, _) in l_words {
|
||||
let current_docids = ctx
|
||||
.word_prefix_pair_proximity_docids(
|
||||
left.as_ref(),
|
||||
right.as_ref(),
|
||||
proximity,
|
||||
)?
|
||||
.unwrap_or_default();
|
||||
let current_docids = match ctx.word_prefix_pair_proximity_docids(
|
||||
left.as_str(),
|
||||
right.as_str(),
|
||||
proximity,
|
||||
)? {
|
||||
Some(docids) => Ok(docids),
|
||||
None => {
|
||||
let r_words =
|
||||
word_derivations(&right, true, 0, ctx.words_fst(), wdcache)?;
|
||||
all_word_pair_proximity_docids(ctx, &[(left, 0)], &r_words, proximity)
|
||||
}
|
||||
}?;
|
||||
docids |= current_docids;
|
||||
}
|
||||
Ok(docids)
|
||||
} else if prefix {
|
||||
let r_words = word_derivations(&right, true, 0, ctx.words_fst(), wdcache)?;
|
||||
all_word_pair_proximity_docids(ctx, &l_words, &r_words, proximity)
|
||||
} else {
|
||||
all_word_pair_proximity_docids(ctx, &l_words, &[(right, 0)], proximity)
|
||||
}
|
||||
|
|
|
@ -269,11 +269,7 @@ impl<'t> Iterator for FacetStringGroupRevRange<'t> {
|
|||
///
|
||||
/// It yields the facet string and the roaring bitmap associated with it.
|
||||
pub struct FacetStringLevelZeroRange<'t> {
|
||||
iter: RoRange<
|
||||
't,
|
||||
FacetStringLevelZeroCodec,
|
||||
FacetStringLevelZeroValueCodec<CboRoaringBitmapCodec>,
|
||||
>,
|
||||
iter: RoRange<'t, FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec>,
|
||||
}
|
||||
|
||||
impl<'t> FacetStringLevelZeroRange<'t> {
|
||||
|
@ -316,10 +312,7 @@ impl<'t> FacetStringLevelZeroRange<'t> {
|
|||
let iter = db
|
||||
.remap_key_type::<ByteSlice>()
|
||||
.range(rtxn, &(left_bound, right_bound))?
|
||||
.remap_types::<
|
||||
FacetStringLevelZeroCodec,
|
||||
FacetStringLevelZeroValueCodec<CboRoaringBitmapCodec>
|
||||
>();
|
||||
.remap_types::<FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec>();
|
||||
|
||||
Ok(FacetStringLevelZeroRange { iter })
|
||||
}
|
||||
|
@ -340,11 +333,7 @@ impl<'t> Iterator for FacetStringLevelZeroRange<'t> {
|
|||
}
|
||||
|
||||
pub struct FacetStringLevelZeroRevRange<'t> {
|
||||
iter: RoRevRange<
|
||||
't,
|
||||
FacetStringLevelZeroCodec,
|
||||
FacetStringLevelZeroValueCodec<CboRoaringBitmapCodec>,
|
||||
>,
|
||||
iter: RoRevRange<'t, FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec>,
|
||||
}
|
||||
|
||||
impl<'t> FacetStringLevelZeroRevRange<'t> {
|
||||
|
@ -387,10 +376,7 @@ impl<'t> FacetStringLevelZeroRevRange<'t> {
|
|||
let iter = db
|
||||
.remap_key_type::<ByteSlice>()
|
||||
.rev_range(rtxn, &(left_bound, right_bound))?
|
||||
.remap_types::<
|
||||
FacetStringLevelZeroCodec,
|
||||
FacetStringLevelZeroValueCodec<CboRoaringBitmapCodec>
|
||||
>();
|
||||
.remap_types::<FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec>();
|
||||
|
||||
Ok(FacetStringLevelZeroRevRange { iter })
|
||||
}
|
||||
|
|
|
@ -392,10 +392,7 @@ impl FilterCondition {
|
|||
rtxn: &heed::RoTxn,
|
||||
index: &Index,
|
||||
numbers_db: heed::Database<FacetLevelValueF64Codec, CboRoaringBitmapCodec>,
|
||||
strings_db: heed::Database<
|
||||
FacetStringLevelZeroCodec,
|
||||
FacetStringLevelZeroValueCodec<CboRoaringBitmapCodec>,
|
||||
>,
|
||||
strings_db: heed::Database<FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec>,
|
||||
field_id: FieldId,
|
||||
operator: &Operator,
|
||||
) -> Result<RoaringBitmap> {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue