mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-23 05:14:27 +01:00
Create a new database on index and add a specialized codec for it
This commit is contained in:
parent
bfb1f9279b
commit
c45d1e3610
@ -8,6 +8,7 @@ mod roaring_bitmap_length;
|
|||||||
mod str_beu32_codec;
|
mod str_beu32_codec;
|
||||||
mod str_ref;
|
mod str_ref;
|
||||||
mod str_str_u8_codec;
|
mod str_str_u8_codec;
|
||||||
|
mod script_language_codec;
|
||||||
|
|
||||||
pub use byte_slice_ref::ByteSliceRefCodec;
|
pub use byte_slice_ref::ByteSliceRefCodec;
|
||||||
pub use str_ref::StrRefCodec;
|
pub use str_ref::StrRefCodec;
|
||||||
@ -21,3 +22,4 @@ pub use self::roaring_bitmap_length::{
|
|||||||
};
|
};
|
||||||
pub use self::str_beu32_codec::StrBEU32Codec;
|
pub use self::str_beu32_codec::StrBEU32Codec;
|
||||||
pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec};
|
pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec};
|
||||||
|
pub use self::script_language_codec::ScriptLanguageCodec;
|
||||||
|
43
milli/src/heed_codec/script_language_codec.rs
Normal file
43
milli/src/heed_codec/script_language_codec.rs
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
use std::borrow::Cow;
|
||||||
|
|
||||||
|
use std::mem::size_of;
|
||||||
|
use std::str;
|
||||||
|
|
||||||
|
use charabia::{Language, Script};
|
||||||
|
|
||||||
|
pub struct ScriptLanguageCodec;
|
||||||
|
|
||||||
|
impl<'a> heed::BytesDecode<'a> for ScriptLanguageCodec {
|
||||||
|
type DItem = (Script, Language);
|
||||||
|
|
||||||
|
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||||
|
let footer_len = size_of::<u32>();
|
||||||
|
|
||||||
|
if bytes.len() < footer_len {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let (script, bytes) = bytes.split_at(bytes.len() - footer_len);
|
||||||
|
let script = str::from_utf8(script).ok()?;
|
||||||
|
let script_name = Script::from_name(script);
|
||||||
|
let lan = str::from_utf8(bytes).ok()?;
|
||||||
|
let lan_name = Language::from_name(lan);
|
||||||
|
|
||||||
|
Some((script_name, lan_name))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> heed::BytesEncode<'a> for ScriptLanguageCodec {
|
||||||
|
type EItem = (Script, Language);
|
||||||
|
|
||||||
|
fn bytes_encode((script, lan): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||||
|
let script_name = script.name();
|
||||||
|
let lan_name = lan.name();
|
||||||
|
|
||||||
|
let mut bytes = Vec::with_capacity(script_name.len() + lan_name.len());
|
||||||
|
bytes.extend_from_slice(script_name.as_bytes());
|
||||||
|
bytes.extend_from_slice(lan_name.as_bytes());
|
||||||
|
|
||||||
|
Some(Cow::Owned(bytes))
|
||||||
|
}
|
||||||
|
}
|
@ -14,6 +14,7 @@ use time::OffsetDateTime;
|
|||||||
use crate::error::{InternalError, UserError};
|
use crate::error::{InternalError, UserError};
|
||||||
use crate::facet::FacetType;
|
use crate::facet::FacetType;
|
||||||
use crate::fields_ids_map::FieldsIdsMap;
|
use crate::fields_ids_map::FieldsIdsMap;
|
||||||
|
use crate::heed_codec::ScriptLanguageCodec;
|
||||||
use crate::heed_codec::facet::{
|
use crate::heed_codec::facet::{
|
||||||
FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
|
FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
|
||||||
FieldIdCodec, OrderedF64Codec,
|
FieldIdCodec, OrderedF64Codec,
|
||||||
@ -83,6 +84,7 @@ pub mod db_name {
|
|||||||
pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s";
|
pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s";
|
||||||
pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings";
|
pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings";
|
||||||
pub const DOCUMENTS: &str = "documents";
|
pub const DOCUMENTS: &str = "documents";
|
||||||
|
pub const SCRIPT_LANGUAGE_DOCIDS: &str = "script_language_docids";
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
@ -122,6 +124,9 @@ pub struct Index {
|
|||||||
/// Maps the position of a word prefix with all the docids where this prefix appears.
|
/// Maps the position of a word prefix with all the docids where this prefix appears.
|
||||||
pub word_prefix_position_docids: Database<StrBEU32Codec, CboRoaringBitmapCodec>,
|
pub word_prefix_position_docids: Database<StrBEU32Codec, CboRoaringBitmapCodec>,
|
||||||
|
|
||||||
|
/// Maps the script and language with all the docids that corresponds to it.
|
||||||
|
pub script_language_docids: Database<ScriptLanguageCodec, RoaringBitmapCodec>,
|
||||||
|
|
||||||
/// Maps the facet field id and the docids for which this field exists
|
/// Maps the facet field id and the docids for which this field exists
|
||||||
pub facet_id_exists_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
|
pub facet_id_exists_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
|
||||||
|
|
||||||
@ -159,6 +164,7 @@ impl Index {
|
|||||||
let exact_word_prefix_docids = env.create_database(Some(EXACT_WORD_PREFIX_DOCIDS))?;
|
let exact_word_prefix_docids = env.create_database(Some(EXACT_WORD_PREFIX_DOCIDS))?;
|
||||||
let docid_word_positions = env.create_database(Some(DOCID_WORD_POSITIONS))?;
|
let docid_word_positions = env.create_database(Some(DOCID_WORD_POSITIONS))?;
|
||||||
let word_pair_proximity_docids = env.create_database(Some(WORD_PAIR_PROXIMITY_DOCIDS))?;
|
let word_pair_proximity_docids = env.create_database(Some(WORD_PAIR_PROXIMITY_DOCIDS))?;
|
||||||
|
let script_language_docids = env.create_database(Some(SCRIPT_LANGUAGE_DOCIDS))?;
|
||||||
let word_prefix_pair_proximity_docids =
|
let word_prefix_pair_proximity_docids =
|
||||||
env.create_database(Some(WORD_PREFIX_PAIR_PROXIMITY_DOCIDS))?;
|
env.create_database(Some(WORD_PREFIX_PAIR_PROXIMITY_DOCIDS))?;
|
||||||
let prefix_word_pair_proximity_docids =
|
let prefix_word_pair_proximity_docids =
|
||||||
@ -186,6 +192,7 @@ impl Index {
|
|||||||
exact_word_prefix_docids,
|
exact_word_prefix_docids,
|
||||||
docid_word_positions,
|
docid_word_positions,
|
||||||
word_pair_proximity_docids,
|
word_pair_proximity_docids,
|
||||||
|
script_language_docids,
|
||||||
word_prefix_pair_proximity_docids,
|
word_prefix_pair_proximity_docids,
|
||||||
prefix_word_pair_proximity_docids,
|
prefix_word_pair_proximity_docids,
|
||||||
word_position_docids,
|
word_position_docids,
|
||||||
|
Loading…
Reference in New Issue
Block a user