mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-23 11:47:28 +01:00
Rename StrStrU8Codec to U8StrStrCodec and reorder its fields
This commit is contained in:
parent
bdeb47305e
commit
1dbbd8694f
@ -15,4 +15,4 @@ pub use self::roaring_bitmap_length::{
|
||||
BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec, RoaringBitmapLenCodec,
|
||||
};
|
||||
pub use self::str_beu32_codec::StrBEU32Codec;
|
||||
pub use self::str_str_u8_codec::{StrStrU8Codec, UncheckedStrStrU8Codec};
|
||||
pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec};
|
||||
|
@ -1,10 +1,10 @@
|
||||
use std::borrow::Cow;
|
||||
use std::str;
|
||||
|
||||
pub struct StrStrU8Codec;
|
||||
pub struct U8StrStrCodec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for StrStrU8Codec {
|
||||
type DItem = (&'a str, &'a str, u8);
|
||||
impl<'a> heed::BytesDecode<'a> for U8StrStrCodec {
|
||||
type DItem = (u8, &'a str, &'a str);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let (n, bytes) = bytes.split_first()?;
|
||||
@ -13,14 +13,14 @@ impl<'a> heed::BytesDecode<'a> for StrStrU8Codec {
|
||||
let s2_bytes = &rest[1..];
|
||||
let s1 = str::from_utf8(s1_bytes).ok()?;
|
||||
let s2 = str::from_utf8(s2_bytes).ok()?;
|
||||
Some((s1, s2, *n))
|
||||
Some((*n, s1, s2))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for StrStrU8Codec {
|
||||
type EItem = (&'a str, &'a str, u8);
|
||||
impl<'a> heed::BytesEncode<'a> for U8StrStrCodec {
|
||||
type EItem = (u8, &'a str, &'a str);
|
||||
|
||||
fn bytes_encode((s1, s2, n): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
fn bytes_encode((n, s1, s2): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
let mut bytes = Vec::with_capacity(s1.len() + s2.len() + 1);
|
||||
bytes.push(*n);
|
||||
bytes.extend_from_slice(s1.as_bytes());
|
||||
@ -29,24 +29,24 @@ impl<'a> heed::BytesEncode<'a> for StrStrU8Codec {
|
||||
Some(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
||||
pub struct UncheckedStrStrU8Codec;
|
||||
pub struct UncheckedU8StrStrCodec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for UncheckedStrStrU8Codec {
|
||||
type DItem = (&'a [u8], &'a [u8], u8);
|
||||
impl<'a> heed::BytesDecode<'a> for UncheckedU8StrStrCodec {
|
||||
type DItem = (u8, &'a [u8], &'a [u8]);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let (n, bytes) = bytes.split_first()?;
|
||||
let s1_end = bytes.iter().position(|b| *b == 0)?;
|
||||
let (s1_bytes, rest) = bytes.split_at(s1_end);
|
||||
let s2_bytes = &rest[1..];
|
||||
Some((s1_bytes, s2_bytes, *n))
|
||||
Some((*n, s1_bytes, s2_bytes))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for UncheckedStrStrU8Codec {
|
||||
type EItem = (&'a [u8], &'a [u8], u8);
|
||||
impl<'a> heed::BytesEncode<'a> for UncheckedU8StrStrCodec {
|
||||
type EItem = (u8, &'a [u8], &'a [u8]);
|
||||
|
||||
fn bytes_encode((s1, s2, n): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
fn bytes_encode((n, s1, s2): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
let mut bytes = Vec::with_capacity(s1.len() + s2.len() + 1);
|
||||
bytes.push(*n);
|
||||
bytes.extend_from_slice(s1);
|
||||
|
@ -21,7 +21,7 @@ use crate::{
|
||||
default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion,
|
||||
DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId,
|
||||
FieldIdWordCountCodec, GeoPoint, ObkvCodec, Result, RoaringBitmapCodec, RoaringBitmapLenCodec,
|
||||
Search, StrBEU32Codec, StrStrU8Codec, BEU16, BEU32,
|
||||
Search, StrBEU32Codec, U8StrStrCodec, BEU16, BEU32,
|
||||
};
|
||||
|
||||
pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
|
||||
@ -106,9 +106,9 @@ pub struct Index {
|
||||
pub docid_word_positions: Database<BEU32StrCodec, BoRoaringBitmapCodec>,
|
||||
|
||||
/// Maps the proximity between a pair of words with all the docids where this relation appears.
|
||||
pub word_pair_proximity_docids: Database<StrStrU8Codec, CboRoaringBitmapCodec>,
|
||||
pub word_pair_proximity_docids: Database<U8StrStrCodec, CboRoaringBitmapCodec>,
|
||||
/// Maps the proximity between a pair of word and prefix with all the docids where this relation appears.
|
||||
pub word_prefix_pair_proximity_docids: Database<StrStrU8Codec, CboRoaringBitmapCodec>,
|
||||
pub word_prefix_pair_proximity_docids: Database<U8StrStrCodec, CboRoaringBitmapCodec>,
|
||||
|
||||
/// Maps the word and the position with the docids that corresponds to it.
|
||||
pub word_position_docids: Database<StrBEU32Codec, CboRoaringBitmapCodec>,
|
||||
|
@ -37,7 +37,7 @@ pub use self::fields_ids_map::FieldsIdsMap;
|
||||
pub use self::heed_codec::{
|
||||
BEU32StrCodec, BoRoaringBitmapCodec, BoRoaringBitmapLenCodec, CboRoaringBitmapCodec,
|
||||
CboRoaringBitmapLenCodec, FieldIdWordCountCodec, ObkvCodec, RoaringBitmapCodec,
|
||||
RoaringBitmapLenCodec, StrBEU32Codec, StrStrU8Codec, UncheckedStrStrU8Codec,
|
||||
RoaringBitmapLenCodec, StrBEU32Codec, U8StrStrCodec, UncheckedU8StrStrCodec,
|
||||
};
|
||||
pub use self::index::Index;
|
||||
pub use self::search::{
|
||||
|
@ -138,7 +138,7 @@ impl<'c> Context<'c> for CriteriaBuilder<'c> {
|
||||
right: &str,
|
||||
proximity: u8,
|
||||
) -> heed::Result<Option<RoaringBitmap>> {
|
||||
let key = (left, right, proximity);
|
||||
let key = (proximity, left, right);
|
||||
self.index.word_pair_proximity_docids.get(self.rtxn, &key)
|
||||
}
|
||||
|
||||
@ -148,7 +148,7 @@ impl<'c> Context<'c> for CriteriaBuilder<'c> {
|
||||
right: &str,
|
||||
proximity: u8,
|
||||
) -> heed::Result<Option<RoaringBitmap>> {
|
||||
let key = (left, right, proximity);
|
||||
let key = (proximity, left, right);
|
||||
self.index.word_prefix_pair_proximity_docids.get(self.rtxn, &key)
|
||||
}
|
||||
|
||||
|
@ -182,16 +182,16 @@ pub fn snap_docid_word_positions(index: &Index) -> String {
|
||||
}
|
||||
pub fn snap_word_pair_proximity_docids(index: &Index) -> String {
|
||||
let snap = make_db_snap_from_iter!(index, word_pair_proximity_docids, |(
|
||||
(word1, word2, proximity),
|
||||
(proximity, word1, word2),
|
||||
b,
|
||||
)| {
|
||||
&format!("{word1:<16} {word2:<16} {proximity:<2} {}", display_bitmap(&b))
|
||||
&format!("{proximity:<2} {word1:<16} {word2:<16} {}", display_bitmap(&b))
|
||||
});
|
||||
snap
|
||||
}
|
||||
pub fn snap_word_prefix_pair_proximity_docids(index: &Index) -> String {
|
||||
let snap = make_db_snap_from_iter!(index, word_prefix_pair_proximity_docids, |(
|
||||
(word1, prefix, proximity),
|
||||
(proximity, word1, prefix),
|
||||
b,
|
||||
)| {
|
||||
&format!("{proximity:<2} {word1:<16} {prefix:<4} {}", display_bitmap(&b))
|
||||
|
@ -177,7 +177,7 @@ use log::debug;
|
||||
use crate::update::index_documents::{
|
||||
create_writer, merge_cbo_roaring_bitmaps, CursorClonableMmap,
|
||||
};
|
||||
use crate::{CboRoaringBitmapCodec, Index, Result, UncheckedStrStrU8Codec};
|
||||
use crate::{CboRoaringBitmapCodec, Index, Result, UncheckedU8StrStrCodec};
|
||||
|
||||
pub struct WordPrefixPairProximityDocids<'t, 'u, 'i> {
|
||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||
@ -259,9 +259,9 @@ impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
|
||||
&mut cursor,
|
||||
|cursor| {
|
||||
if let Some((key, value)) = cursor.move_on_next()? {
|
||||
let (word1, word2, proximity) = UncheckedStrStrU8Codec::bytes_decode(key)
|
||||
let (proximity, word1, word2) = UncheckedU8StrStrCodec::bytes_decode(key)
|
||||
.ok_or(heed::Error::Decoding)?;
|
||||
Ok(Some(((word1, word2, proximity), value)))
|
||||
Ok(Some(((proximity, word1, word2), value)))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
@ -293,7 +293,7 @@ impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
|
||||
let mut db_iter = self
|
||||
.index
|
||||
.word_pair_proximity_docids
|
||||
.remap_key_type::<UncheckedStrStrU8Codec>()
|
||||
.remap_key_type::<UncheckedU8StrStrCodec>()
|
||||
.remap_data_type::<ByteSlice>()
|
||||
.iter(self.wtxn)?;
|
||||
|
||||
@ -358,7 +358,7 @@ fn execute_on_word_pairs_and_prefixes<I>(
|
||||
mut next_word_pair_proximity: impl for<'a> FnMut(
|
||||
&'a mut I,
|
||||
) -> Result<
|
||||
Option<((&'a [u8], &'a [u8], u8), &'a [u8])>,
|
||||
Option<((u8, &'a [u8], &'a [u8]), &'a [u8])>,
|
||||
>,
|
||||
prefixes: &PrefixTrieNode,
|
||||
max_proximity: u8,
|
||||
@ -376,14 +376,14 @@ fn execute_on_word_pairs_and_prefixes<I>(
|
||||
let mut prefix_buffer = Vec::with_capacity(8);
|
||||
let mut merge_buffer = Vec::with_capacity(65_536);
|
||||
|
||||
while let Some(((word1, word2, proximity), data)) = next_word_pair_proximity(iter)? {
|
||||
while let Some(((proximity, word1, word2), data)) = next_word_pair_proximity(iter)? {
|
||||
// skip this iteration if the proximity is over the threshold
|
||||
if proximity > max_proximity {
|
||||
break;
|
||||
};
|
||||
let word2_start_different_than_prev = word2[0] != prev_word2_start;
|
||||
// if there were no potential prefixes for the previous word2 based on its first letter,
|
||||
// and if the current word2 starts with the same letter, then there is also no potential
|
||||
// and if the current word2 starts with the s`ame letter, then there is also no potential
|
||||
// prefixes for the current word2, and we can skip to the next iteration
|
||||
if empty_prefixes && !word2_start_different_than_prev {
|
||||
continue;
|
||||
@ -683,7 +683,7 @@ mod tests {
|
||||
use super::*;
|
||||
use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::{db_snap, CboRoaringBitmapCodec, StrStrU8Codec};
|
||||
use crate::{db_snap, CboRoaringBitmapCodec, U8StrStrCodec};
|
||||
|
||||
fn documents_with_enough_different_words_for_prefixes(prefixes: &[&str]) -> Vec<crate::Object> {
|
||||
let mut documents = Vec::new();
|
||||
@ -858,40 +858,40 @@ mod tests {
|
||||
CboRoaringBitmapCodec::serialize_into(&bitmap_ranges, &mut serialised_bitmap_ranges);
|
||||
|
||||
let word_pairs = [
|
||||
(("healthy", "arbres", 1), &serialised_bitmap123),
|
||||
(("healthy", "boat", 1), &serialised_bitmap123),
|
||||
(("healthy", "ca", 1), &serialised_bitmap123),
|
||||
(("healthy", "cats", 1), &serialised_bitmap456),
|
||||
(("healthy", "cattos", 1), &serialised_bitmap123),
|
||||
(("jittery", "cat", 1), &serialised_bitmap123),
|
||||
(("jittery", "cata", 1), &serialised_bitmap456),
|
||||
(("jittery", "catb", 1), &serialised_bitmap789),
|
||||
(("jittery", "catc", 1), &serialised_bitmap_ranges),
|
||||
(("healthy", "arbre", 2), &serialised_bitmap123),
|
||||
(("healthy", "arbres", 2), &serialised_bitmap456),
|
||||
(("healthy", "cats", 2), &serialised_bitmap789),
|
||||
(("healthy", "cattos", 2), &serialised_bitmap_ranges),
|
||||
(("healthy", "arbre", 3), &serialised_bitmap456),
|
||||
(("healthy", "arbres", 3), &serialised_bitmap789),
|
||||
((1, "healthy", "arbres"), &serialised_bitmap123),
|
||||
((1, "healthy", "boat"), &serialised_bitmap123),
|
||||
((1, "healthy", "ca"), &serialised_bitmap123),
|
||||
((1, "healthy", "cats"), &serialised_bitmap456),
|
||||
((1, "healthy", "cattos"), &serialised_bitmap123),
|
||||
((1, "jittery", "cat"), &serialised_bitmap123),
|
||||
((1, "jittery", "cata"), &serialised_bitmap456),
|
||||
((1, "jittery", "catb"), &serialised_bitmap789),
|
||||
((1, "jittery", "catc"), &serialised_bitmap_ranges),
|
||||
((2, "healthy", "arbre"), &serialised_bitmap123),
|
||||
((2, "healthy", "arbres"), &serialised_bitmap456),
|
||||
((2, "healthy", "cats"), &serialised_bitmap789),
|
||||
((2, "healthy", "cattos"), &serialised_bitmap_ranges),
|
||||
((3, "healthy", "arbre"), &serialised_bitmap456),
|
||||
((3, "healthy", "arbres"), &serialised_bitmap789),
|
||||
];
|
||||
|
||||
let expected_result = [
|
||||
(("healthy", "arb", 1), bitmap123.clone()),
|
||||
(("healthy", "arbre", 1), bitmap123.clone()),
|
||||
(("healthy", "cat", 1), &bitmap456 | &bitmap123),
|
||||
(("healthy", "catto", 1), bitmap123.clone()),
|
||||
(("jittery", "cat", 1), (&bitmap123 | &bitmap456 | &bitmap789 | &bitmap_ranges)),
|
||||
(("healthy", "arb", 2), &bitmap123 | &bitmap456),
|
||||
(("healthy", "arbre", 2), &bitmap123 | &bitmap456),
|
||||
(("healthy", "cat", 2), &bitmap789 | &bitmap_ranges),
|
||||
(("healthy", "catto", 2), bitmap_ranges.clone()),
|
||||
((1, "healthy", "arb"), bitmap123.clone()),
|
||||
((1, "healthy", "arbre"), bitmap123.clone()),
|
||||
((1, "healthy", "cat"), &bitmap456 | &bitmap123),
|
||||
((1, "healthy", "catto"), bitmap123.clone()),
|
||||
((1, "jittery", "cat"), (&bitmap123 | &bitmap456 | &bitmap789 | &bitmap_ranges)),
|
||||
((2, "healthy", "arb"), &bitmap123 | &bitmap456),
|
||||
((2, "healthy", "arbre"), &bitmap123 | &bitmap456),
|
||||
((2, "healthy", "cat"), &bitmap789 | &bitmap_ranges),
|
||||
((2, "healthy", "catto"), bitmap_ranges.clone()),
|
||||
];
|
||||
|
||||
let mut result = vec![];
|
||||
|
||||
let mut iter =
|
||||
IntoIterator::into_iter(word_pairs).map(|((word1, word2, proximity), data)| {
|
||||
((word1.as_bytes(), word2.as_bytes(), proximity), data.as_slice())
|
||||
IntoIterator::into_iter(word_pairs).map(|((proximity, word1, word2), data)| {
|
||||
((proximity, word1.as_bytes(), word2.as_bytes()), data.as_slice())
|
||||
});
|
||||
execute_on_word_pairs_and_prefixes(
|
||||
&mut iter,
|
||||
@ -899,7 +899,7 @@ mod tests {
|
||||
&prefixes,
|
||||
2,
|
||||
|k, v| {
|
||||
let (word1, prefix, proximity) = StrStrU8Codec::bytes_decode(k).unwrap();
|
||||
let (word1, prefix, proximity) = U8StrStrCodec::bytes_decode(k).unwrap();
|
||||
let bitmap = CboRoaringBitmapCodec::bytes_decode(v).unwrap();
|
||||
result.push(((word1.to_owned(), prefix.to_owned(), proximity.to_owned()), bitmap));
|
||||
Ok(())
|
||||
@ -908,8 +908,8 @@ mod tests {
|
||||
.unwrap();
|
||||
|
||||
for (x, y) in result.into_iter().zip(IntoIterator::into_iter(expected_result)) {
|
||||
let ((actual_word1, actual_prefix, actual_proximity), actual_bitmap) = x;
|
||||
let ((expected_word1, expected_prefix, expected_proximity), expected_bitmap) = y;
|
||||
let ((actual_proximity, actual_word1, actual_prefix), actual_bitmap) = x;
|
||||
let ((expected_proximity, expected_word1, expected_prefix), expected_bitmap) = y;
|
||||
|
||||
assert_eq!(actual_word1, expected_word1);
|
||||
assert_eq!(actual_prefix, expected_prefix);
|
||||
|
Loading…
x
Reference in New Issue
Block a user