Sort the word docids to make intersections much faster

This commit is contained in:
Kerollmops 2020-09-07 22:38:49 +02:00
parent ad11c5fb3f
commit 072382fa61
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4

View File

@ -122,6 +122,10 @@ impl<'a> Search<'a> {
derived_words: &[(HashMap<String, (u8, RoaringBitmap)>, RoaringBitmap)], derived_words: &[(HashMap<String, (u8, RoaringBitmap)>, RoaringBitmap)],
) -> RoaringBitmap ) -> RoaringBitmap
{ {
// We sort the derived words by inverse popularity, this way intersections are faster.
let mut derived_words: Vec<_> = derived_words.iter().collect();
derived_words.sort_unstable_by_key(|(_, docids)| docids.len());
// we do a union between all the docids of each of the derived words, // we do a union between all the docids of each of the derived words,
// we got N unions (the number of original query words), we then intersect them. // we got N unions (the number of original query words), we then intersect them.
let mut candidates = RoaringBitmap::new(); let mut candidates = RoaringBitmap::new();