mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-25 20:57:35 +01:00
Rename the BoRoaringBitmap codec
This commit is contained in:
parent
ac84db2506
commit
4eda149ffa
@ -21,7 +21,7 @@ use rayon::prelude::*;
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use structopt::StructOpt;
|
use structopt::StructOpt;
|
||||||
|
|
||||||
use milli::heed_codec::{CsvStringRecordCodec, ByteorderXRoaringBitmapCodec};
|
use milli::heed_codec::{CsvStringRecordCodec, BoRoaringBitmapCodec};
|
||||||
use milli::tokenizer::{simple_tokenizer, only_token};
|
use milli::tokenizer::{simple_tokenizer, only_token};
|
||||||
use milli::{SmallVec32, Index, Position, DocumentId, BEU32};
|
use milli::{SmallVec32, Index, Position, DocumentId, BEU32};
|
||||||
|
|
||||||
@ -364,7 +364,7 @@ impl Store {
|
|||||||
key.extend_from_slice(word.as_bytes());
|
key.extend_from_slice(word.as_bytes());
|
||||||
// We serialize the positions into a buffer.
|
// We serialize the positions into a buffer.
|
||||||
let positions = RoaringBitmap::from_iter(positions.iter().cloned());
|
let positions = RoaringBitmap::from_iter(positions.iter().cloned());
|
||||||
let bytes = ByteorderXRoaringBitmapCodec::bytes_encode(&positions)
|
let bytes = BoRoaringBitmapCodec::bytes_encode(&positions)
|
||||||
.with_context(|| format!("could not serialize positions"))?;
|
.with_context(|| format!("could not serialize positions"))?;
|
||||||
// that we write under the generated key into MTBL
|
// that we write under the generated key into MTBL
|
||||||
if lmdb_key_valid_size(&key) {
|
if lmdb_key_valid_size(&key) {
|
||||||
|
@ -336,13 +336,13 @@ fn average_number_of_words_by_doc(index: &Index, rtxn: &heed::RoTxn) -> anyhow::
|
|||||||
|
|
||||||
fn average_number_of_positions_by_word(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> {
|
fn average_number_of_positions_by_word(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> {
|
||||||
use heed::types::DecodeIgnore;
|
use heed::types::DecodeIgnore;
|
||||||
use milli::ByteorderXRoaringBitmapCodec;
|
use milli::BoRoaringBitmapCodec;
|
||||||
|
|
||||||
let mut values_length = Vec::new();
|
let mut values_length = Vec::new();
|
||||||
let mut count = 0;
|
let mut count = 0;
|
||||||
|
|
||||||
let db = index.docid_word_positions.as_polymorph();
|
let db = index.docid_word_positions.as_polymorph();
|
||||||
for result in db.iter::<_, DecodeIgnore, ByteorderXRoaringBitmapCodec>(rtxn)? {
|
for result in db.iter::<_, DecodeIgnore, BoRoaringBitmapCodec>(rtxn)? {
|
||||||
let ((), val) = result?;
|
let ((), val) = result?;
|
||||||
values_length.push(val.len() as u32);
|
values_length.push(val.len() as u32);
|
||||||
count += 1;
|
count += 1;
|
||||||
|
@ -2,9 +2,9 @@ use std::borrow::Cow;
|
|||||||
use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt};
|
use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
pub struct ByteorderXRoaringBitmapCodec;
|
pub struct BoRoaringBitmapCodec;
|
||||||
|
|
||||||
impl heed::BytesDecode<'_> for ByteorderXRoaringBitmapCodec {
|
impl heed::BytesDecode<'_> for BoRoaringBitmapCodec {
|
||||||
type DItem = RoaringBitmap;
|
type DItem = RoaringBitmap;
|
||||||
|
|
||||||
fn bytes_decode(mut bytes: &[u8]) -> Option<Self::DItem> {
|
fn bytes_decode(mut bytes: &[u8]) -> Option<Self::DItem> {
|
||||||
@ -16,7 +16,7 @@ impl heed::BytesDecode<'_> for ByteorderXRoaringBitmapCodec {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl heed::BytesEncode<'_> for ByteorderXRoaringBitmapCodec {
|
impl heed::BytesEncode<'_> for BoRoaringBitmapCodec {
|
||||||
type EItem = RoaringBitmap;
|
type EItem = RoaringBitmap;
|
||||||
|
|
||||||
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
|
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
|
@ -1,11 +1,11 @@
|
|||||||
mod beu32_str_codec;
|
mod beu32_str_codec;
|
||||||
mod byteorder_x_roaring_bitmap_codec;
|
mod bo_roaring_bitmap_codec;
|
||||||
mod csv_string_record_codec;
|
mod csv_string_record_codec;
|
||||||
mod roaring_bitmap_codec;
|
mod roaring_bitmap_codec;
|
||||||
mod str_str_u8_codec;
|
mod str_str_u8_codec;
|
||||||
|
|
||||||
pub use self::beu32_str_codec::BEU32StrCodec;
|
pub use self::beu32_str_codec::BEU32StrCodec;
|
||||||
pub use self::byteorder_x_roaring_bitmap_codec::ByteorderXRoaringBitmapCodec;
|
pub use self::bo_roaring_bitmap_codec::BoRoaringBitmapCodec;
|
||||||
pub use self::csv_string_record_codec::CsvStringRecordCodec;
|
pub use self::csv_string_record_codec::CsvStringRecordCodec;
|
||||||
pub use self::roaring_bitmap_codec::RoaringBitmapCodec;
|
pub use self::roaring_bitmap_codec::RoaringBitmapCodec;
|
||||||
pub use self::str_str_u8_codec::StrStrU8Codec;
|
pub use self::str_str_u8_codec::StrStrU8Codec;
|
||||||
|
@ -18,7 +18,7 @@ pub use self::search::{Search, SearchResult};
|
|||||||
pub use self::criterion::{Criterion, default_criteria};
|
pub use self::criterion::{Criterion, default_criteria};
|
||||||
pub use self::heed_codec::{
|
pub use self::heed_codec::{
|
||||||
RoaringBitmapCodec, BEU32StrCodec, StrStrU8Codec,
|
RoaringBitmapCodec, BEU32StrCodec, StrStrU8Codec,
|
||||||
CsvStringRecordCodec, ByteorderXRoaringBitmapCodec,
|
CsvStringRecordCodec, BoRoaringBitmapCodec,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>;
|
pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>;
|
||||||
@ -42,7 +42,7 @@ pub struct Index {
|
|||||||
/// A word and all the documents ids containing the word.
|
/// A word and all the documents ids containing the word.
|
||||||
pub word_docids: Database<Str, RoaringBitmapCodec>,
|
pub word_docids: Database<Str, RoaringBitmapCodec>,
|
||||||
/// Maps a word and a document id (u32) to all the positions where the given word appears.
|
/// Maps a word and a document id (u32) to all the positions where the given word appears.
|
||||||
pub docid_word_positions: Database<BEU32StrCodec, ByteorderXRoaringBitmapCodec>,
|
pub docid_word_positions: Database<BEU32StrCodec, BoRoaringBitmapCodec>,
|
||||||
/// Maps the proximity between a pair of words with all the docids where this relation appears.
|
/// Maps the proximity between a pair of words with all the docids where this relation appears.
|
||||||
pub word_pair_proximity_docids: Database<StrStrU8Codec, RoaringBitmapCodec>,
|
pub word_pair_proximity_docids: Database<StrStrU8Codec, RoaringBitmapCodec>,
|
||||||
/// Maps the document id to the document as a CSV line.
|
/// Maps the document id to the document as a CSV line.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user