mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-11 05:54:30 +01:00
Simplify facet update after removing Index::faceted_documents_ids
This commit is contained in:
parent
14832cb324
commit
59f88c14b3
@ -1,7 +1,6 @@
|
|||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
|
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::mem::size_of;
|
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
use charabia::{Language, Script};
|
use charabia::{Language, Script};
|
||||||
@ -14,7 +13,6 @@ use time::OffsetDateTime;
|
|||||||
|
|
||||||
use crate::distance::NDotProductPoint;
|
use crate::distance::NDotProductPoint;
|
||||||
use crate::error::{InternalError, UserError};
|
use crate::error::{InternalError, UserError};
|
||||||
use crate::facet::FacetType;
|
|
||||||
use crate::fields_ids_map::FieldsIdsMap;
|
use crate::fields_ids_map::FieldsIdsMap;
|
||||||
use crate::heed_codec::facet::{
|
use crate::heed_codec::facet::{
|
||||||
FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
|
FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use time::OffsetDateTime;
|
use time::OffsetDateTime;
|
||||||
|
|
||||||
use crate::facet::FacetType;
|
|
||||||
use crate::{ExternalDocumentsIds, FieldDistribution, Index, Result};
|
use crate::{ExternalDocumentsIds, FieldDistribution, Index, Result};
|
||||||
|
|
||||||
pub struct ClearDocuments<'t, 'u, 'i> {
|
pub struct ClearDocuments<'t, 'u, 'i> {
|
||||||
@ -51,7 +50,6 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
|
|||||||
|
|
||||||
// We retrieve the number of documents ids that we are deleting.
|
// We retrieve the number of documents ids that we are deleting.
|
||||||
let number_of_documents = self.index.number_of_documents(self.wtxn)?;
|
let number_of_documents = self.index.number_of_documents(self.wtxn)?;
|
||||||
let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?;
|
|
||||||
|
|
||||||
// We clean some of the main engine datastructures.
|
// We clean some of the main engine datastructures.
|
||||||
self.index.put_words_fst(self.wtxn, &fst::Set::default())?;
|
self.index.put_words_fst(self.wtxn, &fst::Set::default())?;
|
||||||
|
@ -1,8 +1,7 @@
|
|||||||
use std::borrow::Cow;
|
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::BufReader;
|
use std::io::BufReader;
|
||||||
|
|
||||||
use grenad::{CompressionType, Reader};
|
use grenad::CompressionType;
|
||||||
use heed::types::ByteSlice;
|
use heed::types::ByteSlice;
|
||||||
use heed::{BytesEncode, Error, RoTxn, RwTxn};
|
use heed::{BytesEncode, Error, RoTxn, RwTxn};
|
||||||
use obkv::KvReader;
|
use obkv::KvReader;
|
||||||
@ -82,10 +81,7 @@ impl<'i> FacetsUpdateBulk<'i> {
|
|||||||
|
|
||||||
let inner = FacetsUpdateBulkInner { db, delta_data, group_size, min_level_size };
|
let inner = FacetsUpdateBulkInner { db, delta_data, group_size, min_level_size };
|
||||||
|
|
||||||
inner.update(wtxn, &field_ids, |wtxn, field_id, all_docids| {
|
inner.update(wtxn, &field_ids)?;
|
||||||
// TODO: remove the lambda altogether
|
|
||||||
Ok(())
|
|
||||||
})?;
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@ -99,21 +95,14 @@ pub(crate) struct FacetsUpdateBulkInner<R: std::io::Read + std::io::Seek> {
|
|||||||
pub min_level_size: u8,
|
pub min_level_size: u8,
|
||||||
}
|
}
|
||||||
impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
|
impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
|
||||||
pub fn update(
|
pub fn update(mut self, wtxn: &mut RwTxn, field_ids: &[u16]) -> Result<()> {
|
||||||
mut self,
|
|
||||||
wtxn: &mut RwTxn,
|
|
||||||
field_ids: &[u16],
|
|
||||||
mut handle_all_docids: impl FnMut(&mut RwTxn, FieldId, RoaringBitmap) -> Result<()>,
|
|
||||||
) -> Result<()> {
|
|
||||||
self.update_level0(wtxn)?;
|
self.update_level0(wtxn)?;
|
||||||
for &field_id in field_ids.iter() {
|
for &field_id in field_ids.iter() {
|
||||||
self.clear_levels(wtxn, field_id)?;
|
self.clear_levels(wtxn, field_id)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
for &field_id in field_ids.iter() {
|
for &field_id in field_ids.iter() {
|
||||||
let (level_readers, all_docids) = self.compute_levels_for_field_id(field_id, wtxn)?;
|
let level_readers = self.compute_levels_for_field_id(field_id, wtxn)?;
|
||||||
|
|
||||||
handle_all_docids(wtxn, field_id, all_docids)?;
|
|
||||||
|
|
||||||
for level_reader in level_readers {
|
for level_reader in level_readers {
|
||||||
let mut cursor = level_reader.into_cursor()?;
|
let mut cursor = level_reader.into_cursor()?;
|
||||||
@ -201,16 +190,10 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
|
|||||||
&self,
|
&self,
|
||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
txn: &RoTxn,
|
txn: &RoTxn,
|
||||||
) -> Result<(Vec<grenad::Reader<BufReader<File>>>, RoaringBitmap)> {
|
) -> Result<Vec<grenad::Reader<BufReader<File>>>> {
|
||||||
let mut all_docids = RoaringBitmap::new();
|
let subwriters = self.compute_higher_levels(txn, field_id, 32, &mut |_, _| Ok(()))?;
|
||||||
let subwriters = self.compute_higher_levels(txn, field_id, 32, &mut |bitmaps, _| {
|
|
||||||
for bitmap in bitmaps {
|
|
||||||
all_docids |= bitmap;
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
})?;
|
|
||||||
|
|
||||||
Ok((subwriters, all_docids))
|
Ok(subwriters)
|
||||||
}
|
}
|
||||||
#[allow(clippy::type_complexity)]
|
#[allow(clippy::type_complexity)]
|
||||||
fn read_level_0<'t>(
|
fn read_level_0<'t>(
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
use std::collections::HashMap;
|
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::BufReader;
|
use std::io::BufReader;
|
||||||
|
|
||||||
@ -15,7 +14,7 @@ use crate::heed_codec::ByteSliceRefCodec;
|
|||||||
use crate::search::facet::get_highest_level;
|
use crate::search::facet::get_highest_level;
|
||||||
use crate::update::del_add::DelAdd;
|
use crate::update::del_add::DelAdd;
|
||||||
use crate::update::index_documents::valid_lmdb_key;
|
use crate::update::index_documents::valid_lmdb_key;
|
||||||
use crate::{CboRoaringBitmapCodec, FieldId, Index, Result};
|
use crate::{CboRoaringBitmapCodec, Index, Result};
|
||||||
|
|
||||||
enum InsertionResult {
|
enum InsertionResult {
|
||||||
InPlace,
|
InPlace,
|
||||||
@ -30,16 +29,14 @@ enum DeletionResult {
|
|||||||
|
|
||||||
/// Algorithm to incrementally insert and delete elememts into the
|
/// Algorithm to incrementally insert and delete elememts into the
|
||||||
/// `facet_id_(string/f64)_docids` databases.
|
/// `facet_id_(string/f64)_docids` databases.
|
||||||
pub struct FacetsUpdateIncremental<'i> {
|
pub struct FacetsUpdateIncremental {
|
||||||
index: &'i Index,
|
|
||||||
inner: FacetsUpdateIncrementalInner,
|
inner: FacetsUpdateIncrementalInner,
|
||||||
facet_type: FacetType,
|
|
||||||
delta_data: grenad::Reader<BufReader<File>>,
|
delta_data: grenad::Reader<BufReader<File>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'i> FacetsUpdateIncremental<'i> {
|
impl FacetsUpdateIncremental {
|
||||||
pub fn new(
|
pub fn new(
|
||||||
index: &'i Index,
|
index: &Index,
|
||||||
facet_type: FacetType,
|
facet_type: FacetType,
|
||||||
delta_data: grenad::Reader<BufReader<File>>,
|
delta_data: grenad::Reader<BufReader<File>>,
|
||||||
group_size: u8,
|
group_size: u8,
|
||||||
@ -47,7 +44,6 @@ impl<'i> FacetsUpdateIncremental<'i> {
|
|||||||
max_group_size: u8,
|
max_group_size: u8,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
FacetsUpdateIncremental {
|
FacetsUpdateIncremental {
|
||||||
index,
|
|
||||||
inner: FacetsUpdateIncrementalInner {
|
inner: FacetsUpdateIncrementalInner {
|
||||||
db: match facet_type {
|
db: match facet_type {
|
||||||
FacetType::String => index
|
FacetType::String => index
|
||||||
@ -61,12 +57,11 @@ impl<'i> FacetsUpdateIncremental<'i> {
|
|||||||
max_group_size,
|
max_group_size,
|
||||||
min_level_size,
|
min_level_size,
|
||||||
},
|
},
|
||||||
facet_type,
|
|
||||||
delta_data,
|
delta_data,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn execute(self, wtxn: &'i mut RwTxn) -> crate::Result<()> {
|
pub fn execute(self, wtxn: &mut RwTxn) -> crate::Result<()> {
|
||||||
let mut cursor = self.delta_data.into_cursor()?;
|
let mut cursor = self.delta_data.into_cursor()?;
|
||||||
while let Some((key, value)) = cursor.move_on_next()? {
|
while let Some((key, value)) = cursor.move_on_next()? {
|
||||||
if !valid_lmdb_key(key) {
|
if !valid_lmdb_key(key) {
|
||||||
|
@ -115,7 +115,6 @@ pub struct FacetsUpdate<'i> {
|
|||||||
min_level_size: u8,
|
min_level_size: u8,
|
||||||
}
|
}
|
||||||
impl<'i> FacetsUpdate<'i> {
|
impl<'i> FacetsUpdate<'i> {
|
||||||
// TODO grenad::Reader<Key, Obkv<DelAdd, RoaringBitmap>>
|
|
||||||
pub fn new(
|
pub fn new(
|
||||||
index: &'i Index,
|
index: &'i Index,
|
||||||
facet_type: FacetType,
|
facet_type: FacetType,
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
use std::borrow::Cow;
|
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::convert::TryInto;
|
use std::convert::TryInto;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
@ -11,9 +10,7 @@ use heed::types::ByteSlice;
|
|||||||
use heed::RwTxn;
|
use heed::RwTxn;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::helpers::{
|
use super::helpers::{self, merge_ignore_values, valid_lmdb_key, CursorClonableMmap};
|
||||||
self, merge_ignore_values, serialize_roaring_bitmap, valid_lmdb_key, CursorClonableMmap,
|
|
||||||
};
|
|
||||||
use super::{ClonableMmap, MergeFn};
|
use super::{ClonableMmap, MergeFn};
|
||||||
use crate::distance::NDotProductPoint;
|
use crate::distance::NDotProductPoint;
|
||||||
use crate::error::UserError;
|
use crate::error::UserError;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user