mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 14:54:27 +01:00
Introduce the TreeLevel struct
This commit is contained in:
parent
bd1a371c62
commit
89ee2cf576
@ -5,7 +5,7 @@ use std::{str, io, fmt};
|
||||
use anyhow::Context;
|
||||
use byte_unit::Byte;
|
||||
use heed::EnvOpenOptions;
|
||||
use milli::Index;
|
||||
use milli::{Index, TreeLevel};
|
||||
use structopt::StructOpt;
|
||||
|
||||
use Command::*;
|
||||
@ -561,13 +561,12 @@ fn words_level_positions_docids(
|
||||
|
||||
for word in words.iter().map(AsRef::as_ref) {
|
||||
let range = {
|
||||
let left = (word, 0, u32::min_value(), u32::min_value());
|
||||
let right = (word, u8::max_value(), u32::max_value(), u32::max_value());
|
||||
let left = (word, TreeLevel::min_value(), u32::min_value(), u32::min_value());
|
||||
let right = (word, TreeLevel::max_value(), u32::max_value(), u32::max_value());
|
||||
left..=right
|
||||
};
|
||||
for result in index.word_level_position_docids.range(rtxn, &range)? {
|
||||
let ((w, level, left, right), docids) = result?;
|
||||
if word != w { break }
|
||||
|
||||
let count = docids.len().to_string();
|
||||
let docids = if debug {
|
||||
@ -575,7 +574,7 @@ fn words_level_positions_docids(
|
||||
} else {
|
||||
format!("{:?}", docids.iter().collect::<Vec<_>>())
|
||||
};
|
||||
let position_range = if level == 0 {
|
||||
let position_range = if level == TreeLevel::min_value() {
|
||||
format!("{:?}", left)
|
||||
} else {
|
||||
format!("{:?}", left..=right)
|
||||
|
@ -1,12 +1,14 @@
|
||||
use std::borrow::Cow;
|
||||
use std::convert::TryInto;
|
||||
use std::convert::{TryFrom, TryInto};
|
||||
use std::mem::size_of;
|
||||
use std::str;
|
||||
|
||||
use crate::TreeLevel;
|
||||
|
||||
pub struct StrLevelPositionCodec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for StrLevelPositionCodec {
|
||||
type DItem = (&'a str, u8, u32, u32);
|
||||
type DItem = (&'a str, TreeLevel, u32, u32);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let footer_len = size_of::<u8>() + size_of::<u32>() * 2;
|
||||
@ -19,13 +21,14 @@ impl<'a> heed::BytesDecode<'a> for StrLevelPositionCodec {
|
||||
let (level, bytes) = bytes.split_first()?;
|
||||
let left = bytes[..4].try_into().map(u32::from_be_bytes).ok()?;
|
||||
let right = bytes[4..].try_into().map(u32::from_be_bytes).ok()?;
|
||||
let level = TreeLevel::try_from(*level).ok()?;
|
||||
|
||||
Some((word, *level, left, right))
|
||||
Some((word, level, left, right))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for StrLevelPositionCodec {
|
||||
type EItem = (&'a str, u8, u32, u32);
|
||||
type EItem = (&'a str, TreeLevel, u32, u32);
|
||||
|
||||
fn bytes_encode((word, level, left, right): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
let left = left.to_be_bytes();
|
||||
@ -33,7 +36,7 @@ impl<'a> heed::BytesEncode<'a> for StrLevelPositionCodec {
|
||||
|
||||
let mut bytes = Vec::with_capacity(word.len() + 1 + left.len() + right.len());
|
||||
bytes.extend_from_slice(word.as_bytes());
|
||||
bytes.push(*level);
|
||||
bytes.push((*level).into());
|
||||
bytes.extend_from_slice(&left[..]);
|
||||
bytes.extend_from_slice(&right[..]);
|
||||
|
||||
|
@ -9,6 +9,7 @@ pub mod facet;
|
||||
pub mod heed_codec;
|
||||
pub mod index;
|
||||
pub mod proximity;
|
||||
pub mod tree_level;
|
||||
pub mod update;
|
||||
|
||||
use std::borrow::Cow;
|
||||
@ -27,6 +28,7 @@ pub use self::heed_codec::{RoaringBitmapCodec, BoRoaringBitmapCodec, CboRoaringB
|
||||
pub use self::heed_codec::{RoaringBitmapLenCodec, BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec};
|
||||
pub use self::index::Index;
|
||||
pub use self::search::{Search, FacetDistribution, FacetCondition, SearchResult, MatchingWords};
|
||||
pub use self::tree_level::TreeLevel;
|
||||
pub use self::update_store::UpdateStore;
|
||||
|
||||
pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>;
|
||||
|
47
milli/src/tree_level.rs
Normal file
47
milli/src/tree_level.rs
Normal file
@ -0,0 +1,47 @@
|
||||
use std::convert::TryFrom;
|
||||
use std::fmt;
|
||||
|
||||
/// This is just before the lowest printable character (space, sp, 32)
|
||||
const MAX_VALUE: u8 = 31;
|
||||
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub enum Error {
|
||||
LevelTooHigh(u8),
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
#[repr(transparent)]
|
||||
pub struct TreeLevel(u8);
|
||||
|
||||
impl TreeLevel {
|
||||
pub const fn max_value() -> TreeLevel {
|
||||
TreeLevel(MAX_VALUE)
|
||||
}
|
||||
|
||||
pub const fn min_value() -> TreeLevel {
|
||||
TreeLevel(0)
|
||||
}
|
||||
}
|
||||
|
||||
impl Into<u8> for TreeLevel {
|
||||
fn into(self) -> u8 {
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<u8> for TreeLevel {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(value: u8) -> Result<TreeLevel, Error> {
|
||||
match value {
|
||||
0..=MAX_VALUE => Ok(TreeLevel(value)),
|
||||
_ => Err(Error::LevelTooHigh(value)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for TreeLevel {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{}", self.0)
|
||||
}
|
||||
}
|
@ -1,4 +1,5 @@
|
||||
use std::cmp;
|
||||
use std::convert::TryFrom;
|
||||
use std::fs::File;
|
||||
use std::num::NonZeroUsize;
|
||||
|
||||
@ -9,9 +10,9 @@ use log::debug;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::heed_codec::{StrLevelPositionCodec, CboRoaringBitmapCodec};
|
||||
use crate::Index;
|
||||
use crate::update::index_documents::WriteMethod;
|
||||
use crate::update::index_documents::{create_writer, writer_into_reader, write_into_lmdb_database};
|
||||
use crate::{Index, TreeLevel};
|
||||
|
||||
pub struct WordsLevelPositions<'t, 'u, 'i> {
|
||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||
@ -105,8 +106,8 @@ fn compute_positions_levels(
|
||||
let (word, ()) = result?;
|
||||
|
||||
let level_0_range = {
|
||||
let left = (word, 0, u32::min_value(), u32::min_value());
|
||||
let right = (word, 0, u32::max_value(), u32::max_value());
|
||||
let left = (word, TreeLevel::min_value(), u32::min_value(), u32::min_value());
|
||||
let right = (word, TreeLevel::max_value(), u32::max_value(), u32::max_value());
|
||||
left..=right
|
||||
};
|
||||
|
||||
@ -117,7 +118,7 @@ fn compute_positions_levels(
|
||||
// Groups sizes are always a power of the original level_group_size and therefore a group
|
||||
// always maps groups of the previous level and never splits previous levels groups in half.
|
||||
let group_size_iter = (1u8..)
|
||||
.map(|l| (l, level_group_size.get().pow(l as u32)))
|
||||
.map(|l| (TreeLevel::try_from(l).unwrap(), level_group_size.get().pow(l as u32)))
|
||||
.take_while(|(_, s)| first_level_size / *s >= min_level_size.get());
|
||||
|
||||
// As specified in the documentation, we also write the level 0 entries.
|
||||
@ -163,7 +164,7 @@ fn compute_positions_levels(
|
||||
fn write_level_entry(
|
||||
writer: &mut Writer<File>,
|
||||
word: &str,
|
||||
level: u8,
|
||||
level: TreeLevel,
|
||||
left: u32,
|
||||
right: u32,
|
||||
ids: &RoaringBitmap,
|
||||
|
Loading…
Reference in New Issue
Block a user