mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-23 21:20:24 +01:00
Always do the intersections with the universe
This commit is contained in:
parent
50a7393c55
commit
0ca1a4e805
@ -46,36 +46,70 @@ pub struct DatabaseCache<'ctx> {
|
|||||||
pub word_prefix_fids: FxHashMap<Interned<String>, Vec<u16>>,
|
pub word_prefix_fids: FxHashMap<Interned<String>, Vec<u16>>,
|
||||||
}
|
}
|
||||||
impl<'ctx> DatabaseCache<'ctx> {
|
impl<'ctx> DatabaseCache<'ctx> {
|
||||||
fn get_value<'v, K1, KC, DC>(
|
fn get_value<'v, K1, KC>(
|
||||||
txn: &'ctx RoTxn<'_>,
|
txn: &'ctx RoTxn<'_>,
|
||||||
cache_key: K1,
|
cache_key: K1,
|
||||||
db_key: &'v KC::EItem,
|
db_key: &'v KC::EItem,
|
||||||
cache: &mut FxHashMap<K1, Option<Cow<'ctx, [u8]>>>,
|
cache: &mut FxHashMap<K1, Option<Cow<'ctx, [u8]>>>,
|
||||||
|
universe: Option<&RoaringBitmap>,
|
||||||
db: Database<KC, Bytes>,
|
db: Database<KC, Bytes>,
|
||||||
) -> Result<Option<DC::DItem>>
|
) -> Result<Option<RoaringBitmap>>
|
||||||
where
|
where
|
||||||
K1: Copy + Eq + Hash,
|
K1: Copy + Eq + Hash,
|
||||||
KC: BytesEncode<'v>,
|
KC: BytesEncode<'v>,
|
||||||
DC: BytesDecodeOwned,
|
|
||||||
{
|
{
|
||||||
if let Entry::Vacant(entry) = cache.entry(cache_key) {
|
if let Entry::Vacant(entry) = cache.entry(cache_key) {
|
||||||
let bitmap_ptr = db.get(txn, db_key)?.map(Cow::Borrowed);
|
let bitmap_ptr = db.get(txn, db_key)?.map(Cow::Borrowed);
|
||||||
entry.insert(bitmap_ptr);
|
entry.insert(bitmap_ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
match cache.get(&cache_key).unwrap() {
|
let bitmap_bytes = match cache.get(&cache_key).unwrap() {
|
||||||
Some(Cow::Borrowed(bytes)) => DC::bytes_decode_owned(bytes)
|
Some(Cow::Borrowed(bytes)) => bytes,
|
||||||
|
Some(Cow::Owned(bytes)) => bytes.as_slice(),
|
||||||
|
None => return Ok(None),
|
||||||
|
};
|
||||||
|
|
||||||
|
match (bitmap_bytes, universe) {
|
||||||
|
(bytes, Some(universe)) => {
|
||||||
|
CboRoaringBitmapCodec::intersection_with_serialized(bytes, universe)
|
||||||
|
.map(Some)
|
||||||
|
.map_err(Into::into)
|
||||||
|
}
|
||||||
|
(bytes, None) => CboRoaringBitmapCodec::bytes_decode_owned(bytes)
|
||||||
.map(Some)
|
.map(Some)
|
||||||
.map_err(heed::Error::Decoding)
|
.map_err(heed::Error::Decoding)
|
||||||
.map_err(Into::into),
|
.map_err(Into::into),
|
||||||
Some(Cow::Owned(bytes)) => DC::bytes_decode_owned(bytes)
|
|
||||||
.map(Some)
|
|
||||||
.map_err(heed::Error::Decoding)
|
|
||||||
.map_err(Into::into),
|
|
||||||
None => Ok(None),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn get_value_length<'v, K1, KC>(
|
||||||
|
txn: &'ctx RoTxn<'_>,
|
||||||
|
cache_key: K1,
|
||||||
|
db_key: &'v KC::EItem,
|
||||||
|
cache: &mut FxHashMap<K1, Option<Cow<'ctx, [u8]>>>,
|
||||||
|
db: Database<KC, Bytes>,
|
||||||
|
) -> Result<Option<u64>>
|
||||||
|
where
|
||||||
|
K1: Copy + Eq + Hash,
|
||||||
|
KC: BytesEncode<'v>,
|
||||||
|
{
|
||||||
|
if let Entry::Vacant(entry) = cache.entry(cache_key) {
|
||||||
|
let bitmap_ptr = db.get(txn, db_key)?.map(Cow::Borrowed);
|
||||||
|
entry.insert(bitmap_ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
let bitmap_bytes = match cache.get(&cache_key).unwrap() {
|
||||||
|
Some(Cow::Borrowed(bytes)) => bytes,
|
||||||
|
Some(Cow::Owned(bytes)) => bytes.as_slice(),
|
||||||
|
None => return Ok(None),
|
||||||
|
};
|
||||||
|
|
||||||
|
CboRoaringBitmapLenCodec::bytes_decode_owned(bitmap_bytes)
|
||||||
|
.map(Some)
|
||||||
|
.map_err(heed::Error::Decoding)
|
||||||
|
.map_err(Into::into)
|
||||||
|
}
|
||||||
|
|
||||||
fn get_value_from_keys<'v, K1, KC, DC>(
|
fn get_value_from_keys<'v, K1, KC, DC>(
|
||||||
txn: &'ctx RoTxn<'_>,
|
txn: &'ctx RoTxn<'_>,
|
||||||
cache_key: K1,
|
cache_key: K1,
|
||||||
@ -137,11 +171,15 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn word_docids(&mut self, word: Word) -> Result<Option<RoaringBitmap>> {
|
pub fn word_docids(
|
||||||
|
&mut self,
|
||||||
|
universe: Option<&RoaringBitmap>,
|
||||||
|
word: Word,
|
||||||
|
) -> Result<Option<RoaringBitmap>> {
|
||||||
match word {
|
match word {
|
||||||
Word::Original(word) => {
|
Word::Original(word) => {
|
||||||
let exact = self.get_db_exact_word_docids(word)?;
|
let exact = self.get_db_exact_word_docids(universe, word)?;
|
||||||
let tolerant = self.get_db_word_docids(word)?;
|
let tolerant = self.get_db_word_docids(universe, word)?;
|
||||||
Ok(match (exact, tolerant) {
|
Ok(match (exact, tolerant) {
|
||||||
(None, None) => None,
|
(None, None) => None,
|
||||||
(None, Some(tolerant)) => Some(tolerant),
|
(None, Some(tolerant)) => Some(tolerant),
|
||||||
@ -153,12 +191,16 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
Word::Derived(word) => self.get_db_word_docids(word),
|
Word::Derived(word) => self.get_db_word_docids(universe, word),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Retrieve or insert the given value in the `word_docids` database.
|
/// Retrieve or insert the given value in the `word_docids` database.
|
||||||
fn get_db_word_docids(&mut self, word: Interned<String>) -> Result<Option<RoaringBitmap>> {
|
fn get_db_word_docids(
|
||||||
|
&mut self,
|
||||||
|
universe: Option<&RoaringBitmap>,
|
||||||
|
word: Interned<String>,
|
||||||
|
) -> Result<Option<RoaringBitmap>> {
|
||||||
match &self.restricted_fids {
|
match &self.restricted_fids {
|
||||||
Some(restricted_fids) => {
|
Some(restricted_fids) => {
|
||||||
let interned = self.word_interner.get(word).as_str();
|
let interned = self.word_interner.get(word).as_str();
|
||||||
@ -174,11 +216,12 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
merge_cbo_roaring_bitmaps,
|
merge_cbo_roaring_bitmaps,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
None => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
None => DatabaseCache::get_value::<_, _>(
|
||||||
self.txn,
|
self.txn,
|
||||||
word,
|
word,
|
||||||
self.word_interner.get(word).as_str(),
|
self.word_interner.get(word).as_str(),
|
||||||
&mut self.db_cache.word_docids,
|
&mut self.db_cache.word_docids,
|
||||||
|
universe,
|
||||||
self.index.word_docids.remap_data_type::<Bytes>(),
|
self.index.word_docids.remap_data_type::<Bytes>(),
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
@ -186,6 +229,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
|
|
||||||
fn get_db_exact_word_docids(
|
fn get_db_exact_word_docids(
|
||||||
&mut self,
|
&mut self,
|
||||||
|
universe: Option<&RoaringBitmap>,
|
||||||
word: Interned<String>,
|
word: Interned<String>,
|
||||||
) -> Result<Option<RoaringBitmap>> {
|
) -> Result<Option<RoaringBitmap>> {
|
||||||
match &self.restricted_fids {
|
match &self.restricted_fids {
|
||||||
@ -203,21 +247,26 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
merge_cbo_roaring_bitmaps,
|
merge_cbo_roaring_bitmaps,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
None => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
None => DatabaseCache::get_value::<_, _>(
|
||||||
self.txn,
|
self.txn,
|
||||||
word,
|
word,
|
||||||
self.word_interner.get(word).as_str(),
|
self.word_interner.get(word).as_str(),
|
||||||
&mut self.db_cache.exact_word_docids,
|
&mut self.db_cache.exact_word_docids,
|
||||||
|
universe,
|
||||||
self.index.exact_word_docids.remap_data_type::<Bytes>(),
|
self.index.exact_word_docids.remap_data_type::<Bytes>(),
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn word_prefix_docids(&mut self, prefix: Word) -> Result<Option<RoaringBitmap>> {
|
pub fn word_prefix_docids(
|
||||||
|
&mut self,
|
||||||
|
universe: Option<&RoaringBitmap>,
|
||||||
|
prefix: Word,
|
||||||
|
) -> Result<Option<RoaringBitmap>> {
|
||||||
match prefix {
|
match prefix {
|
||||||
Word::Original(prefix) => {
|
Word::Original(prefix) => {
|
||||||
let exact = self.get_db_exact_word_prefix_docids(prefix)?;
|
let exact = self.get_db_exact_word_prefix_docids(universe, prefix)?;
|
||||||
let tolerant = self.get_db_word_prefix_docids(prefix)?;
|
let tolerant = self.get_db_word_prefix_docids(universe, prefix)?;
|
||||||
Ok(match (exact, tolerant) {
|
Ok(match (exact, tolerant) {
|
||||||
(None, None) => None,
|
(None, None) => None,
|
||||||
(None, Some(tolerant)) => Some(tolerant),
|
(None, Some(tolerant)) => Some(tolerant),
|
||||||
@ -229,13 +278,14 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
Word::Derived(prefix) => self.get_db_word_prefix_docids(prefix),
|
Word::Derived(prefix) => self.get_db_word_prefix_docids(universe, prefix),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Retrieve or insert the given value in the `word_prefix_docids` database.
|
/// Retrieve or insert the given value in the `word_prefix_docids` database.
|
||||||
fn get_db_word_prefix_docids(
|
fn get_db_word_prefix_docids(
|
||||||
&mut self,
|
&mut self,
|
||||||
|
universe: Option<&RoaringBitmap>,
|
||||||
prefix: Interned<String>,
|
prefix: Interned<String>,
|
||||||
) -> Result<Option<RoaringBitmap>> {
|
) -> Result<Option<RoaringBitmap>> {
|
||||||
match &self.restricted_fids {
|
match &self.restricted_fids {
|
||||||
@ -253,11 +303,12 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
merge_cbo_roaring_bitmaps,
|
merge_cbo_roaring_bitmaps,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
None => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
None => DatabaseCache::get_value::<_, _>(
|
||||||
self.txn,
|
self.txn,
|
||||||
prefix,
|
prefix,
|
||||||
self.word_interner.get(prefix).as_str(),
|
self.word_interner.get(prefix).as_str(),
|
||||||
&mut self.db_cache.word_prefix_docids,
|
&mut self.db_cache.word_prefix_docids,
|
||||||
|
universe,
|
||||||
self.index.word_prefix_docids.remap_data_type::<Bytes>(),
|
self.index.word_prefix_docids.remap_data_type::<Bytes>(),
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
@ -265,6 +316,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
|
|
||||||
fn get_db_exact_word_prefix_docids(
|
fn get_db_exact_word_prefix_docids(
|
||||||
&mut self,
|
&mut self,
|
||||||
|
universe: Option<&RoaringBitmap>,
|
||||||
prefix: Interned<String>,
|
prefix: Interned<String>,
|
||||||
) -> Result<Option<RoaringBitmap>> {
|
) -> Result<Option<RoaringBitmap>> {
|
||||||
match &self.restricted_fids {
|
match &self.restricted_fids {
|
||||||
@ -282,11 +334,12 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
merge_cbo_roaring_bitmaps,
|
merge_cbo_roaring_bitmaps,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
None => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
None => DatabaseCache::get_value::<_, _>(
|
||||||
self.txn,
|
self.txn,
|
||||||
prefix,
|
prefix,
|
||||||
self.word_interner.get(prefix).as_str(),
|
self.word_interner.get(prefix).as_str(),
|
||||||
&mut self.db_cache.exact_word_prefix_docids,
|
&mut self.db_cache.exact_word_prefix_docids,
|
||||||
|
universe,
|
||||||
self.index.exact_word_prefix_docids.remap_data_type::<Bytes>(),
|
self.index.exact_word_prefix_docids.remap_data_type::<Bytes>(),
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
@ -294,6 +347,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
|
|
||||||
pub fn get_db_word_pair_proximity_docids(
|
pub fn get_db_word_pair_proximity_docids(
|
||||||
&mut self,
|
&mut self,
|
||||||
|
universe: Option<&RoaringBitmap>,
|
||||||
word1: Interned<String>,
|
word1: Interned<String>,
|
||||||
word2: Interned<String>,
|
word2: Interned<String>,
|
||||||
proximity: u8,
|
proximity: u8,
|
||||||
@ -320,8 +374,8 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
for fid in fids {
|
for fid in fids {
|
||||||
// for each field, intersect left word bitmap and right word bitmap,
|
// for each field, intersect left word bitmap and right word bitmap,
|
||||||
// then merge the result in a global bitmap before storing it in the cache.
|
// then merge the result in a global bitmap before storing it in the cache.
|
||||||
let word1_docids = self.get_db_word_fid_docids(word1, fid)?;
|
let word1_docids = self.get_db_word_fid_docids(universe, word1, fid)?;
|
||||||
let word2_docids = self.get_db_word_fid_docids(word2, fid)?;
|
let word2_docids = self.get_db_word_fid_docids(universe, word2, fid)?;
|
||||||
if let (Some(word1_docids), Some(word2_docids)) =
|
if let (Some(word1_docids), Some(word2_docids)) =
|
||||||
(word1_docids, word2_docids)
|
(word1_docids, word2_docids)
|
||||||
{
|
{
|
||||||
@ -341,7 +395,33 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
|
|
||||||
Ok(docids)
|
Ok(docids)
|
||||||
}
|
}
|
||||||
ProximityPrecision::ByWord => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
ProximityPrecision::ByWord => DatabaseCache::get_value::<_, _>(
|
||||||
|
self.txn,
|
||||||
|
(proximity, word1, word2),
|
||||||
|
&(
|
||||||
|
proximity,
|
||||||
|
self.word_interner.get(word1).as_str(),
|
||||||
|
self.word_interner.get(word2).as_str(),
|
||||||
|
),
|
||||||
|
&mut self.db_cache.word_pair_proximity_docids,
|
||||||
|
universe,
|
||||||
|
self.index.word_pair_proximity_docids.remap_data_type::<Bytes>(),
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_db_word_pair_proximity_docids_len(
|
||||||
|
&mut self,
|
||||||
|
universe: Option<&RoaringBitmap>,
|
||||||
|
word1: Interned<String>,
|
||||||
|
word2: Interned<String>,
|
||||||
|
proximity: u8,
|
||||||
|
) -> Result<Option<u64>> {
|
||||||
|
match self.index.proximity_precision(self.txn)?.unwrap_or_default() {
|
||||||
|
ProximityPrecision::ByAttribute => Ok(self
|
||||||
|
.get_db_word_pair_proximity_docids(universe, word1, word2, proximity)?
|
||||||
|
.map(|d| d.len())),
|
||||||
|
ProximityPrecision::ByWord => DatabaseCache::get_value_length::<_, _>(
|
||||||
self.txn,
|
self.txn,
|
||||||
(proximity, word1, word2),
|
(proximity, word1, word2),
|
||||||
&(
|
&(
|
||||||
@ -355,34 +435,9 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_db_word_pair_proximity_docids_len(
|
|
||||||
&mut self,
|
|
||||||
word1: Interned<String>,
|
|
||||||
word2: Interned<String>,
|
|
||||||
proximity: u8,
|
|
||||||
) -> Result<Option<u64>> {
|
|
||||||
match self.index.proximity_precision(self.txn)?.unwrap_or_default() {
|
|
||||||
ProximityPrecision::ByAttribute => Ok(self
|
|
||||||
.get_db_word_pair_proximity_docids(word1, word2, proximity)?
|
|
||||||
.map(|d| d.len())),
|
|
||||||
ProximityPrecision::ByWord => {
|
|
||||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapLenCodec>(
|
|
||||||
self.txn,
|
|
||||||
(proximity, word1, word2),
|
|
||||||
&(
|
|
||||||
proximity,
|
|
||||||
self.word_interner.get(word1).as_str(),
|
|
||||||
self.word_interner.get(word2).as_str(),
|
|
||||||
),
|
|
||||||
&mut self.db_cache.word_pair_proximity_docids,
|
|
||||||
self.index.word_pair_proximity_docids.remap_data_type::<Bytes>(),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn get_db_word_prefix_pair_proximity_docids(
|
pub fn get_db_word_prefix_pair_proximity_docids(
|
||||||
&mut self,
|
&mut self,
|
||||||
|
universe: Option<&RoaringBitmap>,
|
||||||
word1: Interned<String>,
|
word1: Interned<String>,
|
||||||
prefix2: Interned<String>,
|
prefix2: Interned<String>,
|
||||||
mut proximity: u8,
|
mut proximity: u8,
|
||||||
@ -409,8 +464,9 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
// for each field, intersect left word bitmap and right word bitmap,
|
// for each field, intersect left word bitmap and right word bitmap,
|
||||||
// then merge the result in a global bitmap before storing it in the cache.
|
// then merge the result in a global bitmap before storing it in the cache.
|
||||||
for fid in fids {
|
for fid in fids {
|
||||||
let word1_docids = self.get_db_word_fid_docids(word1, fid)?;
|
let word1_docids = self.get_db_word_fid_docids(universe, word1, fid)?;
|
||||||
let prefix2_docids = self.get_db_word_prefix_fid_docids(prefix2, fid)?;
|
let prefix2_docids =
|
||||||
|
self.get_db_word_prefix_fid_docids(universe, prefix2, fid)?;
|
||||||
if let (Some(word1_docids), Some(prefix2_docids)) =
|
if let (Some(word1_docids), Some(prefix2_docids)) =
|
||||||
(word1_docids, prefix2_docids)
|
(word1_docids, prefix2_docids)
|
||||||
{
|
{
|
||||||
@ -452,16 +508,18 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
|
|
||||||
pub fn get_db_prefix_word_pair_proximity_docids(
|
pub fn get_db_prefix_word_pair_proximity_docids(
|
||||||
&mut self,
|
&mut self,
|
||||||
|
universe: Option<&RoaringBitmap>,
|
||||||
left_prefix: Interned<String>,
|
left_prefix: Interned<String>,
|
||||||
right: Interned<String>,
|
right: Interned<String>,
|
||||||
proximity: u8,
|
proximity: u8,
|
||||||
) -> Result<Option<RoaringBitmap>> {
|
) -> Result<Option<RoaringBitmap>> {
|
||||||
// only accept exact matches on reverted positions
|
// only accept exact matches on reverted positions
|
||||||
self.get_db_word_pair_proximity_docids(left_prefix, right, proximity)
|
self.get_db_word_pair_proximity_docids(universe, left_prefix, right, proximity)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_db_word_fid_docids(
|
pub fn get_db_word_fid_docids(
|
||||||
&mut self,
|
&mut self,
|
||||||
|
universe: Option<&RoaringBitmap>,
|
||||||
word: Interned<String>,
|
word: Interned<String>,
|
||||||
fid: u16,
|
fid: u16,
|
||||||
) -> Result<Option<RoaringBitmap>> {
|
) -> Result<Option<RoaringBitmap>> {
|
||||||
@ -470,17 +528,19 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
return Ok(None);
|
return Ok(None);
|
||||||
}
|
}
|
||||||
|
|
||||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
DatabaseCache::get_value::<_, _>(
|
||||||
self.txn,
|
self.txn,
|
||||||
(word, fid),
|
(word, fid),
|
||||||
&(self.word_interner.get(word).as_str(), fid),
|
&(self.word_interner.get(word).as_str(), fid),
|
||||||
&mut self.db_cache.word_fid_docids,
|
&mut self.db_cache.word_fid_docids,
|
||||||
|
universe,
|
||||||
self.index.word_fid_docids.remap_data_type::<Bytes>(),
|
self.index.word_fid_docids.remap_data_type::<Bytes>(),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_db_word_prefix_fid_docids(
|
pub fn get_db_word_prefix_fid_docids(
|
||||||
&mut self,
|
&mut self,
|
||||||
|
universe: Option<&RoaringBitmap>,
|
||||||
word_prefix: Interned<String>,
|
word_prefix: Interned<String>,
|
||||||
fid: u16,
|
fid: u16,
|
||||||
) -> Result<Option<RoaringBitmap>> {
|
) -> Result<Option<RoaringBitmap>> {
|
||||||
@ -489,11 +549,12 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
return Ok(None);
|
return Ok(None);
|
||||||
}
|
}
|
||||||
|
|
||||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
DatabaseCache::get_value::<_, _>(
|
||||||
self.txn,
|
self.txn,
|
||||||
(word_prefix, fid),
|
(word_prefix, fid),
|
||||||
&(self.word_interner.get(word_prefix).as_str(), fid),
|
&(self.word_interner.get(word_prefix).as_str(), fid),
|
||||||
&mut self.db_cache.word_prefix_fid_docids,
|
&mut self.db_cache.word_prefix_fid_docids,
|
||||||
|
universe,
|
||||||
self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
|
self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@ -554,28 +615,32 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
|
|
||||||
pub fn get_db_word_position_docids(
|
pub fn get_db_word_position_docids(
|
||||||
&mut self,
|
&mut self,
|
||||||
|
universe: Option<&RoaringBitmap>,
|
||||||
word: Interned<String>,
|
word: Interned<String>,
|
||||||
position: u16,
|
position: u16,
|
||||||
) -> Result<Option<RoaringBitmap>> {
|
) -> Result<Option<RoaringBitmap>> {
|
||||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
DatabaseCache::get_value::<_, _>(
|
||||||
self.txn,
|
self.txn,
|
||||||
(word, position),
|
(word, position),
|
||||||
&(self.word_interner.get(word).as_str(), position),
|
&(self.word_interner.get(word).as_str(), position),
|
||||||
&mut self.db_cache.word_position_docids,
|
&mut self.db_cache.word_position_docids,
|
||||||
|
universe,
|
||||||
self.index.word_position_docids.remap_data_type::<Bytes>(),
|
self.index.word_position_docids.remap_data_type::<Bytes>(),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_db_word_prefix_position_docids(
|
pub fn get_db_word_prefix_position_docids(
|
||||||
&mut self,
|
&mut self,
|
||||||
|
universe: Option<&RoaringBitmap>,
|
||||||
word_prefix: Interned<String>,
|
word_prefix: Interned<String>,
|
||||||
position: u16,
|
position: u16,
|
||||||
) -> Result<Option<RoaringBitmap>> {
|
) -> Result<Option<RoaringBitmap>> {
|
||||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
DatabaseCache::get_value::<_, _>(
|
||||||
self.txn,
|
self.txn,
|
||||||
(word_prefix, position),
|
(word_prefix, position),
|
||||||
&(self.word_interner.get(word_prefix).as_str(), position),
|
&(self.word_interner.get(word_prefix).as_str(), position),
|
||||||
&mut self.db_cache.word_prefix_position_docids,
|
&mut self.db_cache.word_prefix_position_docids,
|
||||||
|
universe,
|
||||||
self.index.word_prefix_position_docids.remap_data_type::<Bytes>(),
|
self.index.word_prefix_position_docids.remap_data_type::<Bytes>(),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
@ -171,9 +171,10 @@ impl State {
|
|||||||
// Note: Since the position is stored bucketed in word_position_docids, for queries with a lot of
|
// Note: Since the position is stored bucketed in word_position_docids, for queries with a lot of
|
||||||
// longer phrases we'll be losing on precision here.
|
// longer phrases we'll be losing on precision here.
|
||||||
let bucketed_position = crate::bucketed_position(position + offset);
|
let bucketed_position = crate::bucketed_position(position + offset);
|
||||||
let word_position_docids =
|
let word_position_docids = ctx
|
||||||
ctx.get_db_word_position_docids(*word, bucketed_position)?.unwrap_or_default()
|
.get_db_word_position_docids(Some(universe), *word, bucketed_position)?
|
||||||
& universe;
|
.unwrap_or_default()
|
||||||
|
& universe;
|
||||||
candidates &= word_position_docids;
|
candidates &= word_position_docids;
|
||||||
if candidates.is_empty() {
|
if candidates.is_empty() {
|
||||||
return Ok(State::Empty(query_graph.clone()));
|
return Ok(State::Empty(query_graph.clone()));
|
||||||
@ -199,7 +200,9 @@ impl State {
|
|||||||
// ignore stop words words in phrases
|
// ignore stop words words in phrases
|
||||||
.flatten()
|
.flatten()
|
||||||
.map(|word| -> Result<_> {
|
.map(|word| -> Result<_> {
|
||||||
Ok(ctx.get_db_word_fid_docids(*word, fid)?.unwrap_or_default())
|
Ok(ctx
|
||||||
|
.get_db_word_fid_docids(Some(universe), *word, fid)?
|
||||||
|
.unwrap_or_default())
|
||||||
}),
|
}),
|
||||||
)?;
|
)?;
|
||||||
intersection &= &candidates;
|
intersection &= &candidates;
|
||||||
|
@ -232,11 +232,12 @@ fn resolve_universe(
|
|||||||
#[tracing::instrument(level = "trace", skip_all, target = "search::query")]
|
#[tracing::instrument(level = "trace", skip_all, target = "search::query")]
|
||||||
fn resolve_negative_words(
|
fn resolve_negative_words(
|
||||||
ctx: &mut SearchContext<'_>,
|
ctx: &mut SearchContext<'_>,
|
||||||
|
universe: Option<&RoaringBitmap>,
|
||||||
negative_words: &[Word],
|
negative_words: &[Word],
|
||||||
) -> Result<RoaringBitmap> {
|
) -> Result<RoaringBitmap> {
|
||||||
let mut negative_bitmap = RoaringBitmap::new();
|
let mut negative_bitmap = RoaringBitmap::new();
|
||||||
for &word in negative_words {
|
for &word in negative_words {
|
||||||
if let Some(bitmap) = ctx.word_docids(word)? {
|
if let Some(bitmap) = ctx.word_docids(universe, word)? {
|
||||||
negative_bitmap |= bitmap;
|
negative_bitmap |= bitmap;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -246,13 +247,14 @@ fn resolve_negative_words(
|
|||||||
#[tracing::instrument(level = "trace", skip_all, target = "search::query")]
|
#[tracing::instrument(level = "trace", skip_all, target = "search::query")]
|
||||||
fn resolve_negative_phrases(
|
fn resolve_negative_phrases(
|
||||||
ctx: &mut SearchContext<'_>,
|
ctx: &mut SearchContext<'_>,
|
||||||
|
universe: Option<&RoaringBitmap>,
|
||||||
negative_phrases: &[LocatedQueryTerm],
|
negative_phrases: &[LocatedQueryTerm],
|
||||||
) -> Result<RoaringBitmap> {
|
) -> Result<RoaringBitmap> {
|
||||||
let mut negative_bitmap = RoaringBitmap::new();
|
let mut negative_bitmap = RoaringBitmap::new();
|
||||||
for term in negative_phrases {
|
for term in negative_phrases {
|
||||||
let query_term = ctx.term_interner.get(term.value);
|
let query_term = ctx.term_interner.get(term.value);
|
||||||
if let Some(phrase) = query_term.original_phrase() {
|
if let Some(phrase) = query_term.original_phrase() {
|
||||||
negative_bitmap |= ctx.get_phrase_docids(phrase)?;
|
negative_bitmap |= ctx.get_phrase_docids(universe, phrase)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(negative_bitmap)
|
Ok(negative_bitmap)
|
||||||
@ -686,8 +688,8 @@ pub fn execute_search(
|
|||||||
located_query_terms_from_tokens(ctx, tokens, words_limit)?;
|
located_query_terms_from_tokens(ctx, tokens, words_limit)?;
|
||||||
used_negative_operator = !negative_words.is_empty() || !negative_phrases.is_empty();
|
used_negative_operator = !negative_words.is_empty() || !negative_phrases.is_empty();
|
||||||
|
|
||||||
let ignored_documents = resolve_negative_words(ctx, &negative_words)?;
|
let ignored_documents = resolve_negative_words(ctx, Some(&universe), &negative_words)?;
|
||||||
let ignored_phrases = resolve_negative_phrases(ctx, &negative_phrases)?;
|
let ignored_phrases = resolve_negative_phrases(ctx, Some(&universe), &negative_phrases)?;
|
||||||
|
|
||||||
universe -= ignored_documents;
|
universe -= ignored_documents;
|
||||||
universe -= ignored_phrases;
|
universe -= ignored_phrases;
|
||||||
|
@ -417,7 +417,7 @@ fn split_best_frequency(
|
|||||||
let left = ctx.word_interner.insert(left.to_owned());
|
let left = ctx.word_interner.insert(left.to_owned());
|
||||||
let right = ctx.word_interner.insert(right.to_owned());
|
let right = ctx.word_interner.insert(right.to_owned());
|
||||||
|
|
||||||
if let Some(frequency) = ctx.get_db_word_pair_proximity_docids_len(left, right, 1)? {
|
if let Some(frequency) = ctx.get_db_word_pair_proximity_docids_len(None, left, right, 1)? {
|
||||||
if best.map_or(true, |(old, _, _)| frequency > old) {
|
if best.map_or(true, |(old, _, _)| frequency > old) {
|
||||||
best = Some((frequency, left, right));
|
best = Some((frequency, left, right));
|
||||||
}
|
}
|
||||||
|
@ -26,18 +26,15 @@ fn compute_docids(
|
|||||||
} else {
|
} else {
|
||||||
return Ok(Default::default());
|
return Ok(Default::default());
|
||||||
};
|
};
|
||||||
let mut candidates = match exact_term {
|
|
||||||
ExactTerm::Phrase(phrase) => ctx.get_phrase_docids(phrase)?.clone(),
|
let candidates = match exact_term {
|
||||||
|
// TODO I move the intersection here
|
||||||
|
ExactTerm::Phrase(phrase) => ctx.get_phrase_docids(Some(universe), phrase)? & universe,
|
||||||
ExactTerm::Word(word) => {
|
ExactTerm::Word(word) => {
|
||||||
if let Some(word_candidates) = ctx.word_docids(Word::Original(word))? {
|
ctx.word_docids(Some(universe), Word::Original(word))?.unwrap_or_default()
|
||||||
word_candidates
|
|
||||||
} else {
|
|
||||||
return Ok(Default::default());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
candidates &= universe;
|
|
||||||
Ok(candidates)
|
Ok(candidates)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -30,8 +30,12 @@ impl RankingRuleGraphTrait for FidGraph {
|
|||||||
|
|
||||||
let docids = if let Some(fid) = condition.fid {
|
let docids = if let Some(fid) = condition.fid {
|
||||||
// maybe compute_query_term_subset_docids_within_field_id should accept a universe as argument
|
// maybe compute_query_term_subset_docids_within_field_id should accept a universe as argument
|
||||||
let docids =
|
let docids = compute_query_term_subset_docids_within_field_id(
|
||||||
compute_query_term_subset_docids_within_field_id(ctx, &term.term_subset, fid)?;
|
ctx,
|
||||||
|
Some(universe),
|
||||||
|
&term.term_subset,
|
||||||
|
fid,
|
||||||
|
)?;
|
||||||
docids & universe
|
docids & universe
|
||||||
} else {
|
} else {
|
||||||
RoaringBitmap::new()
|
RoaringBitmap::new()
|
||||||
|
@ -33,6 +33,7 @@ impl RankingRuleGraphTrait for PositionGraph {
|
|||||||
docids |= universe
|
docids |= universe
|
||||||
& compute_query_term_subset_docids_within_position(
|
& compute_query_term_subset_docids_within_position(
|
||||||
ctx,
|
ctx,
|
||||||
|
Some(universe),
|
||||||
&term.term_subset,
|
&term.term_subset,
|
||||||
*position,
|
*position,
|
||||||
)?;
|
)?;
|
||||||
|
@ -74,10 +74,10 @@ pub fn compute_docids(
|
|||||||
if right_derivs.len() > 1 {
|
if right_derivs.len() > 1 {
|
||||||
let universe = &universe;
|
let universe = &universe;
|
||||||
if let Some(left_phrase) = left_phrase {
|
if let Some(left_phrase) = left_phrase {
|
||||||
if universe.is_disjoint(ctx.get_phrase_docids(left_phrase)?) {
|
if universe.is_disjoint(ctx.get_phrase_docids(Some(universe), left_phrase)?) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
} else if let Some(left_word_docids) = ctx.word_docids(left_word)? {
|
} else if let Some(left_word_docids) = ctx.word_docids(Some(universe), left_word)? {
|
||||||
if universe.is_disjoint(&left_word_docids) {
|
if universe.is_disjoint(&left_word_docids) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -123,7 +123,10 @@ fn compute_prefix_edges(
|
|||||||
|
|
||||||
let mut universe = universe.clone();
|
let mut universe = universe.clone();
|
||||||
if let Some(phrase) = left_phrase {
|
if let Some(phrase) = left_phrase {
|
||||||
let phrase_docids = ctx.get_phrase_docids(phrase)?;
|
// TODO we can clearly give the universe to this method
|
||||||
|
// Unfortunately, it is deserializing/computing stuff and
|
||||||
|
// keeping the result as a materialized bitmap.
|
||||||
|
let phrase_docids = ctx.get_phrase_docids(Some(&universe), phrase)?;
|
||||||
if !phrase_docids.is_empty() {
|
if !phrase_docids.is_empty() {
|
||||||
used_left_phrases.insert(phrase);
|
used_left_phrases.insert(phrase);
|
||||||
}
|
}
|
||||||
@ -133,9 +136,13 @@ fn compute_prefix_edges(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(new_docids) =
|
// TODO check that the fact that the universe always changes is not an issue, e.g. caching stuff.
|
||||||
ctx.get_db_word_prefix_pair_proximity_docids(left_word, right_prefix, forward_proximity)?
|
if let Some(new_docids) = ctx.get_db_word_prefix_pair_proximity_docids(
|
||||||
{
|
Some(&universe),
|
||||||
|
left_word,
|
||||||
|
right_prefix,
|
||||||
|
forward_proximity,
|
||||||
|
)? {
|
||||||
let new_docids = &universe & new_docids;
|
let new_docids = &universe & new_docids;
|
||||||
if !new_docids.is_empty() {
|
if !new_docids.is_empty() {
|
||||||
used_left_words.insert(left_word);
|
used_left_words.insert(left_word);
|
||||||
@ -147,6 +154,7 @@ fn compute_prefix_edges(
|
|||||||
// No swapping when computing the proximity between a phrase and a word
|
// No swapping when computing the proximity between a phrase and a word
|
||||||
if left_phrase.is_none() {
|
if left_phrase.is_none() {
|
||||||
if let Some(new_docids) = ctx.get_db_prefix_word_pair_proximity_docids(
|
if let Some(new_docids) = ctx.get_db_prefix_word_pair_proximity_docids(
|
||||||
|
Some(&universe),
|
||||||
right_prefix,
|
right_prefix,
|
||||||
left_word,
|
left_word,
|
||||||
backward_proximity,
|
backward_proximity,
|
||||||
@ -177,26 +185,29 @@ fn compute_non_prefix_edges(
|
|||||||
let mut universe = universe.clone();
|
let mut universe = universe.clone();
|
||||||
|
|
||||||
for phrase in left_phrase.iter().chain(right_phrase.iter()).copied() {
|
for phrase in left_phrase.iter().chain(right_phrase.iter()).copied() {
|
||||||
let phrase_docids = ctx.get_phrase_docids(phrase)?;
|
// TODO do the intersection in the method, again!
|
||||||
|
let phrase_docids = ctx.get_phrase_docids(Some(&universe), phrase)?;
|
||||||
universe &= phrase_docids;
|
universe &= phrase_docids;
|
||||||
if universe.is_empty() {
|
if universe.is_empty() {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO check that it is not an issue to alterate the universe
|
||||||
if let Some(new_docids) =
|
if let Some(new_docids) =
|
||||||
ctx.get_db_word_pair_proximity_docids(word1, word2, forward_proximity)?
|
ctx.get_db_word_pair_proximity_docids(Some(&universe), word1, word2, forward_proximity)?
|
||||||
{
|
{
|
||||||
let new_docids = &universe & new_docids;
|
|
||||||
if !new_docids.is_empty() {
|
if !new_docids.is_empty() {
|
||||||
*docids |= new_docids;
|
*docids |= new_docids;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if backward_proximity >= 1 && left_phrase.is_none() && right_phrase.is_none() {
|
if backward_proximity >= 1 && left_phrase.is_none() && right_phrase.is_none() {
|
||||||
if let Some(new_docids) =
|
if let Some(new_docids) = ctx.get_db_word_pair_proximity_docids(
|
||||||
ctx.get_db_word_pair_proximity_docids(word2, word1, backward_proximity)?
|
Some(&universe),
|
||||||
{
|
word2,
|
||||||
let new_docids = &universe & new_docids;
|
word1,
|
||||||
|
backward_proximity,
|
||||||
|
)? {
|
||||||
if !new_docids.is_empty() {
|
if !new_docids.is_empty() {
|
||||||
*docids |= new_docids;
|
*docids |= new_docids;
|
||||||
}
|
}
|
||||||
|
@ -19,11 +19,16 @@ pub struct PhraseDocIdsCache {
|
|||||||
}
|
}
|
||||||
impl<'ctx> SearchContext<'ctx> {
|
impl<'ctx> SearchContext<'ctx> {
|
||||||
/// Get the document ids associated with the given phrase
|
/// Get the document ids associated with the given phrase
|
||||||
pub fn get_phrase_docids(&mut self, phrase: Interned<Phrase>) -> Result<&RoaringBitmap> {
|
pub fn get_phrase_docids(
|
||||||
|
&mut self,
|
||||||
|
universe: Option<&RoaringBitmap>,
|
||||||
|
phrase: Interned<Phrase>,
|
||||||
|
) -> Result<&RoaringBitmap> {
|
||||||
if self.phrase_docids.cache.contains_key(&phrase) {
|
if self.phrase_docids.cache.contains_key(&phrase) {
|
||||||
return Ok(&self.phrase_docids.cache[&phrase]);
|
return Ok(&self.phrase_docids.cache[&phrase]);
|
||||||
};
|
};
|
||||||
let docids = compute_phrase_docids(self, phrase)?;
|
let docids = compute_phrase_docids(self, universe, phrase)?;
|
||||||
|
// TODO can we improve that? Because there is an issue, we keep that in cache...
|
||||||
let _ = self.phrase_docids.cache.insert(phrase, docids);
|
let _ = self.phrase_docids.cache.insert(phrase, docids);
|
||||||
let docids = &self.phrase_docids.cache[&phrase];
|
let docids = &self.phrase_docids.cache[&phrase];
|
||||||
Ok(docids)
|
Ok(docids)
|
||||||
@ -35,17 +40,18 @@ pub fn compute_query_term_subset_docids(
|
|||||||
term: &QueryTermSubset,
|
term: &QueryTermSubset,
|
||||||
) -> Result<RoaringBitmap> {
|
) -> Result<RoaringBitmap> {
|
||||||
let mut docids = RoaringBitmap::new();
|
let mut docids = RoaringBitmap::new();
|
||||||
|
// TODO use the MultiOps trait to do large intersections
|
||||||
for word in term.all_single_words_except_prefix_db(ctx)? {
|
for word in term.all_single_words_except_prefix_db(ctx)? {
|
||||||
if let Some(word_docids) = ctx.word_docids(word)? {
|
if let Some(word_docids) = ctx.word_docids(universe, word)? {
|
||||||
docids |= word_docids;
|
docids |= word_docids;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for phrase in term.all_phrases(ctx)? {
|
for phrase in term.all_phrases(ctx)? {
|
||||||
docids |= ctx.get_phrase_docids(phrase)?;
|
docids |= ctx.get_phrase_docids(universe, phrase)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(prefix) = term.use_prefix_db(ctx) {
|
if let Some(prefix) = term.use_prefix_db(ctx) {
|
||||||
if let Some(prefix_docids) = ctx.word_prefix_docids(prefix)? {
|
if let Some(prefix_docids) = ctx.word_prefix_docids(universe, prefix)? {
|
||||||
docids |= prefix_docids;
|
docids |= prefix_docids;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -58,12 +64,13 @@ pub fn compute_query_term_subset_docids(
|
|||||||
|
|
||||||
pub fn compute_query_term_subset_docids_within_field_id(
|
pub fn compute_query_term_subset_docids_within_field_id(
|
||||||
ctx: &mut SearchContext<'_>,
|
ctx: &mut SearchContext<'_>,
|
||||||
|
universe: Option<&RoaringBitmap>,
|
||||||
term: &QueryTermSubset,
|
term: &QueryTermSubset,
|
||||||
fid: u16,
|
fid: u16,
|
||||||
) -> Result<RoaringBitmap> {
|
) -> Result<RoaringBitmap> {
|
||||||
let mut docids = RoaringBitmap::new();
|
let mut docids = RoaringBitmap::new();
|
||||||
for word in term.all_single_words_except_prefix_db(ctx)? {
|
for word in term.all_single_words_except_prefix_db(ctx)? {
|
||||||
if let Some(word_fid_docids) = ctx.get_db_word_fid_docids(word.interned(), fid)? {
|
if let Some(word_fid_docids) = ctx.get_db_word_fid_docids(universe, word.interned(), fid)? {
|
||||||
docids |= word_fid_docids;
|
docids |= word_fid_docids;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -72,15 +79,15 @@ pub fn compute_query_term_subset_docids_within_field_id(
|
|||||||
// There may be false positives when resolving a phrase, so we're not
|
// There may be false positives when resolving a phrase, so we're not
|
||||||
// guaranteed that all of its words are within a single fid.
|
// guaranteed that all of its words are within a single fid.
|
||||||
if let Some(word) = phrase.words(ctx).iter().flatten().next() {
|
if let Some(word) = phrase.words(ctx).iter().flatten().next() {
|
||||||
if let Some(word_fid_docids) = ctx.get_db_word_fid_docids(*word, fid)? {
|
if let Some(word_fid_docids) = ctx.get_db_word_fid_docids(universe, *word, fid)? {
|
||||||
docids |= ctx.get_phrase_docids(phrase)? & word_fid_docids;
|
docids |= ctx.get_phrase_docids(Some(&word_fid_docids), phrase)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(word_prefix) = term.use_prefix_db(ctx) {
|
if let Some(word_prefix) = term.use_prefix_db(ctx) {
|
||||||
if let Some(word_fid_docids) =
|
if let Some(word_fid_docids) =
|
||||||
ctx.get_db_word_prefix_fid_docids(word_prefix.interned(), fid)?
|
ctx.get_db_word_prefix_fid_docids(universe, word_prefix.interned(), fid)?
|
||||||
{
|
{
|
||||||
docids |= word_fid_docids;
|
docids |= word_fid_docids;
|
||||||
}
|
}
|
||||||
@ -91,13 +98,14 @@ pub fn compute_query_term_subset_docids_within_field_id(
|
|||||||
|
|
||||||
pub fn compute_query_term_subset_docids_within_position(
|
pub fn compute_query_term_subset_docids_within_position(
|
||||||
ctx: &mut SearchContext<'_>,
|
ctx: &mut SearchContext<'_>,
|
||||||
|
universe: Option<&RoaringBitmap>,
|
||||||
term: &QueryTermSubset,
|
term: &QueryTermSubset,
|
||||||
position: u16,
|
position: u16,
|
||||||
) -> Result<RoaringBitmap> {
|
) -> Result<RoaringBitmap> {
|
||||||
let mut docids = RoaringBitmap::new();
|
let mut docids = RoaringBitmap::new();
|
||||||
for word in term.all_single_words_except_prefix_db(ctx)? {
|
for word in term.all_single_words_except_prefix_db(ctx)? {
|
||||||
if let Some(word_position_docids) =
|
if let Some(word_position_docids) =
|
||||||
ctx.get_db_word_position_docids(word.interned(), position)?
|
ctx.get_db_word_position_docids(universe, word.interned(), position)?
|
||||||
{
|
{
|
||||||
docids |= word_position_docids;
|
docids |= word_position_docids;
|
||||||
}
|
}
|
||||||
@ -107,15 +115,17 @@ pub fn compute_query_term_subset_docids_within_position(
|
|||||||
// It's difficult to know the expected position of the words in the phrase,
|
// It's difficult to know the expected position of the words in the phrase,
|
||||||
// so instead we just check the first one.
|
// so instead we just check the first one.
|
||||||
if let Some(word) = phrase.words(ctx).iter().flatten().next() {
|
if let Some(word) = phrase.words(ctx).iter().flatten().next() {
|
||||||
if let Some(word_position_docids) = ctx.get_db_word_position_docids(*word, position)? {
|
if let Some(word_position_docids) =
|
||||||
docids |= ctx.get_phrase_docids(phrase)? & word_position_docids
|
ctx.get_db_word_position_docids(universe, *word, position)?
|
||||||
|
{
|
||||||
|
docids |= ctx.get_phrase_docids(Some(&word_position_docids), phrase)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(word_prefix) = term.use_prefix_db(ctx) {
|
if let Some(word_prefix) = term.use_prefix_db(ctx) {
|
||||||
if let Some(word_position_docids) =
|
if let Some(word_position_docids) =
|
||||||
ctx.get_db_word_prefix_position_docids(word_prefix.interned(), position)?
|
ctx.get_db_word_prefix_position_docids(universe, word_prefix.interned(), position)?
|
||||||
{
|
{
|
||||||
docids |= word_position_docids;
|
docids |= word_position_docids;
|
||||||
}
|
}
|
||||||
@ -180,6 +190,7 @@ pub fn compute_query_graph_docids(
|
|||||||
|
|
||||||
pub fn compute_phrase_docids(
|
pub fn compute_phrase_docids(
|
||||||
ctx: &mut SearchContext<'_>,
|
ctx: &mut SearchContext<'_>,
|
||||||
|
universe: Option<&RoaringBitmap>,
|
||||||
phrase: Interned<Phrase>,
|
phrase: Interned<Phrase>,
|
||||||
) -> Result<RoaringBitmap> {
|
) -> Result<RoaringBitmap> {
|
||||||
let Phrase { words } = ctx.phrase_interner.get(phrase).clone();
|
let Phrase { words } = ctx.phrase_interner.get(phrase).clone();
|
||||||
@ -189,7 +200,7 @@ pub fn compute_phrase_docids(
|
|||||||
}
|
}
|
||||||
let mut candidates = RoaringBitmap::new();
|
let mut candidates = RoaringBitmap::new();
|
||||||
for word in words.iter().flatten().copied() {
|
for word in words.iter().flatten().copied() {
|
||||||
if let Some(word_docids) = ctx.word_docids(Word::Original(word))? {
|
if let Some(word_docids) = ctx.word_docids(universe, Word::Original(word))? {
|
||||||
candidates |= word_docids;
|
candidates |= word_docids;
|
||||||
} else {
|
} else {
|
||||||
return Ok(RoaringBitmap::new());
|
return Ok(RoaringBitmap::new());
|
||||||
@ -213,7 +224,7 @@ pub fn compute_phrase_docids(
|
|||||||
.filter_map(|(index, word)| word.as_ref().map(|word| (index, word)))
|
.filter_map(|(index, word)| word.as_ref().map(|word| (index, word)))
|
||||||
{
|
{
|
||||||
if dist == 0 {
|
if dist == 0 {
|
||||||
match ctx.get_db_word_pair_proximity_docids(s1, s2, 1)? {
|
match ctx.get_db_word_pair_proximity_docids(universe, s1, s2, 1)? {
|
||||||
Some(m) => bitmaps.push(m),
|
Some(m) => bitmaps.push(m),
|
||||||
// If there are no documents for this pair, there will be no
|
// If there are no documents for this pair, there will be no
|
||||||
// results for the phrase query.
|
// results for the phrase query.
|
||||||
@ -223,7 +234,7 @@ pub fn compute_phrase_docids(
|
|||||||
let mut bitmap = RoaringBitmap::new();
|
let mut bitmap = RoaringBitmap::new();
|
||||||
for dist in 0..=dist {
|
for dist in 0..=dist {
|
||||||
if let Some(m) =
|
if let Some(m) =
|
||||||
ctx.get_db_word_pair_proximity_docids(s1, s2, dist as u8 + 1)?
|
ctx.get_db_word_pair_proximity_docids(universe, s1, s2, dist as u8 + 1)?
|
||||||
{
|
{
|
||||||
bitmap |= m;
|
bitmap |= m;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user