mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 14:54:27 +01:00
Introduce the TreeLevel struct
This commit is contained in:
parent
bd1a371c62
commit
89ee2cf576
@ -5,7 +5,7 @@ use std::{str, io, fmt};
|
|||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
use byte_unit::Byte;
|
use byte_unit::Byte;
|
||||||
use heed::EnvOpenOptions;
|
use heed::EnvOpenOptions;
|
||||||
use milli::Index;
|
use milli::{Index, TreeLevel};
|
||||||
use structopt::StructOpt;
|
use structopt::StructOpt;
|
||||||
|
|
||||||
use Command::*;
|
use Command::*;
|
||||||
@ -561,13 +561,12 @@ fn words_level_positions_docids(
|
|||||||
|
|
||||||
for word in words.iter().map(AsRef::as_ref) {
|
for word in words.iter().map(AsRef::as_ref) {
|
||||||
let range = {
|
let range = {
|
||||||
let left = (word, 0, u32::min_value(), u32::min_value());
|
let left = (word, TreeLevel::min_value(), u32::min_value(), u32::min_value());
|
||||||
let right = (word, u8::max_value(), u32::max_value(), u32::max_value());
|
let right = (word, TreeLevel::max_value(), u32::max_value(), u32::max_value());
|
||||||
left..=right
|
left..=right
|
||||||
};
|
};
|
||||||
for result in index.word_level_position_docids.range(rtxn, &range)? {
|
for result in index.word_level_position_docids.range(rtxn, &range)? {
|
||||||
let ((w, level, left, right), docids) = result?;
|
let ((w, level, left, right), docids) = result?;
|
||||||
if word != w { break }
|
|
||||||
|
|
||||||
let count = docids.len().to_string();
|
let count = docids.len().to_string();
|
||||||
let docids = if debug {
|
let docids = if debug {
|
||||||
@ -575,7 +574,7 @@ fn words_level_positions_docids(
|
|||||||
} else {
|
} else {
|
||||||
format!("{:?}", docids.iter().collect::<Vec<_>>())
|
format!("{:?}", docids.iter().collect::<Vec<_>>())
|
||||||
};
|
};
|
||||||
let position_range = if level == 0 {
|
let position_range = if level == TreeLevel::min_value() {
|
||||||
format!("{:?}", left)
|
format!("{:?}", left)
|
||||||
} else {
|
} else {
|
||||||
format!("{:?}", left..=right)
|
format!("{:?}", left..=right)
|
||||||
|
@ -1,12 +1,14 @@
|
|||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::convert::TryInto;
|
use std::convert::{TryFrom, TryInto};
|
||||||
use std::mem::size_of;
|
use std::mem::size_of;
|
||||||
use std::str;
|
use std::str;
|
||||||
|
|
||||||
|
use crate::TreeLevel;
|
||||||
|
|
||||||
pub struct StrLevelPositionCodec;
|
pub struct StrLevelPositionCodec;
|
||||||
|
|
||||||
impl<'a> heed::BytesDecode<'a> for StrLevelPositionCodec {
|
impl<'a> heed::BytesDecode<'a> for StrLevelPositionCodec {
|
||||||
type DItem = (&'a str, u8, u32, u32);
|
type DItem = (&'a str, TreeLevel, u32, u32);
|
||||||
|
|
||||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||||
let footer_len = size_of::<u8>() + size_of::<u32>() * 2;
|
let footer_len = size_of::<u8>() + size_of::<u32>() * 2;
|
||||||
@ -19,13 +21,14 @@ impl<'a> heed::BytesDecode<'a> for StrLevelPositionCodec {
|
|||||||
let (level, bytes) = bytes.split_first()?;
|
let (level, bytes) = bytes.split_first()?;
|
||||||
let left = bytes[..4].try_into().map(u32::from_be_bytes).ok()?;
|
let left = bytes[..4].try_into().map(u32::from_be_bytes).ok()?;
|
||||||
let right = bytes[4..].try_into().map(u32::from_be_bytes).ok()?;
|
let right = bytes[4..].try_into().map(u32::from_be_bytes).ok()?;
|
||||||
|
let level = TreeLevel::try_from(*level).ok()?;
|
||||||
|
|
||||||
Some((word, *level, left, right))
|
Some((word, level, left, right))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> heed::BytesEncode<'a> for StrLevelPositionCodec {
|
impl<'a> heed::BytesEncode<'a> for StrLevelPositionCodec {
|
||||||
type EItem = (&'a str, u8, u32, u32);
|
type EItem = (&'a str, TreeLevel, u32, u32);
|
||||||
|
|
||||||
fn bytes_encode((word, level, left, right): &Self::EItem) -> Option<Cow<[u8]>> {
|
fn bytes_encode((word, level, left, right): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||||
let left = left.to_be_bytes();
|
let left = left.to_be_bytes();
|
||||||
@ -33,7 +36,7 @@ impl<'a> heed::BytesEncode<'a> for StrLevelPositionCodec {
|
|||||||
|
|
||||||
let mut bytes = Vec::with_capacity(word.len() + 1 + left.len() + right.len());
|
let mut bytes = Vec::with_capacity(word.len() + 1 + left.len() + right.len());
|
||||||
bytes.extend_from_slice(word.as_bytes());
|
bytes.extend_from_slice(word.as_bytes());
|
||||||
bytes.push(*level);
|
bytes.push((*level).into());
|
||||||
bytes.extend_from_slice(&left[..]);
|
bytes.extend_from_slice(&left[..]);
|
||||||
bytes.extend_from_slice(&right[..]);
|
bytes.extend_from_slice(&right[..]);
|
||||||
|
|
||||||
|
@ -9,6 +9,7 @@ pub mod facet;
|
|||||||
pub mod heed_codec;
|
pub mod heed_codec;
|
||||||
pub mod index;
|
pub mod index;
|
||||||
pub mod proximity;
|
pub mod proximity;
|
||||||
|
pub mod tree_level;
|
||||||
pub mod update;
|
pub mod update;
|
||||||
|
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
@ -27,6 +28,7 @@ pub use self::heed_codec::{RoaringBitmapCodec, BoRoaringBitmapCodec, CboRoaringB
|
|||||||
pub use self::heed_codec::{RoaringBitmapLenCodec, BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec};
|
pub use self::heed_codec::{RoaringBitmapLenCodec, BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec};
|
||||||
pub use self::index::Index;
|
pub use self::index::Index;
|
||||||
pub use self::search::{Search, FacetDistribution, FacetCondition, SearchResult, MatchingWords};
|
pub use self::search::{Search, FacetDistribution, FacetCondition, SearchResult, MatchingWords};
|
||||||
|
pub use self::tree_level::TreeLevel;
|
||||||
pub use self::update_store::UpdateStore;
|
pub use self::update_store::UpdateStore;
|
||||||
|
|
||||||
pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>;
|
pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>;
|
||||||
|
47
milli/src/tree_level.rs
Normal file
47
milli/src/tree_level.rs
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
use std::convert::TryFrom;
|
||||||
|
use std::fmt;
|
||||||
|
|
||||||
|
/// This is just before the lowest printable character (space, sp, 32)
|
||||||
|
const MAX_VALUE: u8 = 31;
|
||||||
|
|
||||||
|
#[derive(Debug, Copy, Clone)]
|
||||||
|
pub enum Error {
|
||||||
|
LevelTooHigh(u8),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
|
#[repr(transparent)]
|
||||||
|
pub struct TreeLevel(u8);
|
||||||
|
|
||||||
|
impl TreeLevel {
|
||||||
|
pub const fn max_value() -> TreeLevel {
|
||||||
|
TreeLevel(MAX_VALUE)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub const fn min_value() -> TreeLevel {
|
||||||
|
TreeLevel(0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Into<u8> for TreeLevel {
|
||||||
|
fn into(self) -> u8 {
|
||||||
|
self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<u8> for TreeLevel {
|
||||||
|
type Error = Error;
|
||||||
|
|
||||||
|
fn try_from(value: u8) -> Result<TreeLevel, Error> {
|
||||||
|
match value {
|
||||||
|
0..=MAX_VALUE => Ok(TreeLevel(value)),
|
||||||
|
_ => Err(Error::LevelTooHigh(value)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for TreeLevel {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
write!(f, "{}", self.0)
|
||||||
|
}
|
||||||
|
}
|
@ -1,4 +1,5 @@
|
|||||||
use std::cmp;
|
use std::cmp;
|
||||||
|
use std::convert::TryFrom;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::num::NonZeroUsize;
|
use std::num::NonZeroUsize;
|
||||||
|
|
||||||
@ -9,9 +10,9 @@ use log::debug;
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::heed_codec::{StrLevelPositionCodec, CboRoaringBitmapCodec};
|
use crate::heed_codec::{StrLevelPositionCodec, CboRoaringBitmapCodec};
|
||||||
use crate::Index;
|
|
||||||
use crate::update::index_documents::WriteMethod;
|
use crate::update::index_documents::WriteMethod;
|
||||||
use crate::update::index_documents::{create_writer, writer_into_reader, write_into_lmdb_database};
|
use crate::update::index_documents::{create_writer, writer_into_reader, write_into_lmdb_database};
|
||||||
|
use crate::{Index, TreeLevel};
|
||||||
|
|
||||||
pub struct WordsLevelPositions<'t, 'u, 'i> {
|
pub struct WordsLevelPositions<'t, 'u, 'i> {
|
||||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||||
@ -105,8 +106,8 @@ fn compute_positions_levels(
|
|||||||
let (word, ()) = result?;
|
let (word, ()) = result?;
|
||||||
|
|
||||||
let level_0_range = {
|
let level_0_range = {
|
||||||
let left = (word, 0, u32::min_value(), u32::min_value());
|
let left = (word, TreeLevel::min_value(), u32::min_value(), u32::min_value());
|
||||||
let right = (word, 0, u32::max_value(), u32::max_value());
|
let right = (word, TreeLevel::max_value(), u32::max_value(), u32::max_value());
|
||||||
left..=right
|
left..=right
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -117,7 +118,7 @@ fn compute_positions_levels(
|
|||||||
// Groups sizes are always a power of the original level_group_size and therefore a group
|
// Groups sizes are always a power of the original level_group_size and therefore a group
|
||||||
// always maps groups of the previous level and never splits previous levels groups in half.
|
// always maps groups of the previous level and never splits previous levels groups in half.
|
||||||
let group_size_iter = (1u8..)
|
let group_size_iter = (1u8..)
|
||||||
.map(|l| (l, level_group_size.get().pow(l as u32)))
|
.map(|l| (TreeLevel::try_from(l).unwrap(), level_group_size.get().pow(l as u32)))
|
||||||
.take_while(|(_, s)| first_level_size / *s >= min_level_size.get());
|
.take_while(|(_, s)| first_level_size / *s >= min_level_size.get());
|
||||||
|
|
||||||
// As specified in the documentation, we also write the level 0 entries.
|
// As specified in the documentation, we also write the level 0 entries.
|
||||||
@ -163,7 +164,7 @@ fn compute_positions_levels(
|
|||||||
fn write_level_entry(
|
fn write_level_entry(
|
||||||
writer: &mut Writer<File>,
|
writer: &mut Writer<File>,
|
||||||
word: &str,
|
word: &str,
|
||||||
level: u8,
|
level: TreeLevel,
|
||||||
left: u32,
|
left: u32,
|
||||||
right: u32,
|
right: u32,
|
||||||
ids: &RoaringBitmap,
|
ids: &RoaringBitmap,
|
||||||
|
Loading…
Reference in New Issue
Block a user