mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-23 05:14:27 +01:00
Improve the Mtbl heed codec to only encode MTBL databases
This commit is contained in:
parent
21aafd603c
commit
3fe497e129
2
Cargo.lock
generated
2
Cargo.lock
generated
@ -1238,7 +1238,7 @@ checksum = "2839e79665f131bdb5782e51f2c6c9599c133c6098982a54c794358bf432529c"
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "oxidized-mtbl"
|
name = "oxidized-mtbl"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
source = "git+https://github.com/Kerollmops/oxidized-mtbl.git?rev=13294cc#13294ccd73c9d6f71645a3ed2852656f3c86d31d"
|
source = "git+https://github.com/Kerollmops/oxidized-mtbl.git?rev=4ca66e5#4ca66e50115da760f602e878943af59f06c53af1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"byteorder",
|
"byteorder",
|
||||||
"crc32c",
|
"crc32c",
|
||||||
|
@ -22,7 +22,7 @@ jemallocator = "0.3.2"
|
|||||||
levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] }
|
levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] }
|
||||||
memmap = "0.7.0"
|
memmap = "0.7.0"
|
||||||
once_cell = "1.4.0"
|
once_cell = "1.4.0"
|
||||||
oxidized-mtbl = { git = "https://github.com/Kerollmops/oxidized-mtbl.git", rev = "13294cc" }
|
oxidized-mtbl = { git = "https://github.com/Kerollmops/oxidized-mtbl.git", rev = "4ca66e5" }
|
||||||
rayon = "1.3.1"
|
rayon = "1.3.1"
|
||||||
ringtail = "0.3.0"
|
ringtail = "0.3.0"
|
||||||
roaring = { git = "https://github.com/Kerollmops/roaring-rs.git", branch = "mem-usage" }
|
roaring = { git = "https://github.com/Kerollmops/roaring-rs.git", branch = "mem-usage" }
|
||||||
|
@ -579,13 +579,16 @@ fn main() -> anyhow::Result<()> {
|
|||||||
builder.extend(docs_stores);
|
builder.extend(docs_stores);
|
||||||
builder.build().write_into(&mut writer)?;
|
builder.build().write_into(&mut writer)?;
|
||||||
let file = writer.into_inner()?;
|
let file = writer.into_inner()?;
|
||||||
|
|
||||||
|
// Read back the documents MTBL database from the file.
|
||||||
let documents_mmap = unsafe { memmap::Mmap::map(&file)? };
|
let documents_mmap = unsafe { memmap::Mmap::map(&file)? };
|
||||||
|
let documents = Reader::new(documents_mmap)?;
|
||||||
|
|
||||||
debug!("We are writing the postings lists and documents into LMDB on disk...");
|
debug!("We are writing the postings lists and documents into LMDB on disk...");
|
||||||
// We merge the postings lists into LMDB.
|
// We merge the postings lists into LMDB.
|
||||||
let mut wtxn = env.write_txn()?;
|
let mut wtxn = env.write_txn()?;
|
||||||
merge_into_lmdb(stores, |k, v| lmdb_writer(&mut wtxn, &index, k, v))?;
|
merge_into_lmdb(stores, |k, v| lmdb_writer(&mut wtxn, &index, k, v))?;
|
||||||
index.put_documents(&mut wtxn, &documents_mmap)?;
|
index.put_documents(&mut wtxn, &documents)?;
|
||||||
let count = index.number_of_documents(&wtxn)?;
|
let count = index.number_of_documents(&wtxn)?;
|
||||||
wtxn.commit()?;
|
wtxn.commit()?;
|
||||||
|
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
|
use std::marker::PhantomData;
|
||||||
use oxidized_mtbl::Reader;
|
use oxidized_mtbl::Reader;
|
||||||
|
|
||||||
pub struct MtblCodec;
|
pub struct MtblCodec<A>(PhantomData<A>);
|
||||||
|
|
||||||
impl<'a> heed::BytesDecode<'a> for MtblCodec {
|
impl<'a> heed::BytesDecode<'a> for MtblCodec<&'a [u8]> {
|
||||||
type DItem = Reader<&'a [u8]>;
|
type DItem = Reader<&'a [u8]>;
|
||||||
|
|
||||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||||
@ -11,10 +12,10 @@ impl<'a> heed::BytesDecode<'a> for MtblCodec {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl heed::BytesEncode<'_> for MtblCodec {
|
impl<'a, A: AsRef<[u8]> + 'a> heed::BytesEncode<'a> for MtblCodec<A> {
|
||||||
type EItem = [u8];
|
type EItem = Reader<A>;
|
||||||
|
|
||||||
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
|
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
|
||||||
Some(Cow::Borrowed(item))
|
Some(Cow::Borrowed(item.as_bytes()))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -12,6 +12,7 @@ use anyhow::{bail, Context};
|
|||||||
use fxhash::{FxHasher32, FxHasher64};
|
use fxhash::{FxHasher32, FxHasher64};
|
||||||
use heed::types::*;
|
use heed::types::*;
|
||||||
use heed::{PolyDatabase, Database};
|
use heed::{PolyDatabase, Database};
|
||||||
|
use oxidized_mtbl as omtbl;
|
||||||
|
|
||||||
pub use self::search::{Search, SearchResult};
|
pub use self::search::{Search, SearchResult};
|
||||||
pub use self::criterion::{Criterion, default_criteria};
|
pub use self::criterion::{Criterion, default_criteria};
|
||||||
@ -90,7 +91,7 @@ impl Index {
|
|||||||
iter: impl IntoIterator<Item=DocumentId>,
|
iter: impl IntoIterator<Item=DocumentId>,
|
||||||
) -> anyhow::Result<Vec<(DocumentId, Vec<u8>)>>
|
) -> anyhow::Result<Vec<(DocumentId, Vec<u8>)>>
|
||||||
{
|
{
|
||||||
match self.main.get::<_, Str, MtblCodec>(rtxn, DOCUMENTS_KEY)? {
|
match self.main.get::<_, Str, MtblCodec<&[u8]>>(rtxn, DOCUMENTS_KEY)? {
|
||||||
Some(documents) => {
|
Some(documents) => {
|
||||||
iter.into_iter().map(|id| {
|
iter.into_iter().map(|id| {
|
||||||
let key = id.to_be_bytes();
|
let key = id.to_be_bytes();
|
||||||
@ -103,13 +104,13 @@ impl Index {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn put_documents(&self, wtxn: &mut heed::RwTxn, documents: &[u8]) -> anyhow::Result<()> {
|
pub fn put_documents<A: AsRef<[u8]>>(&self, wtxn: &mut heed::RwTxn, documents: &omtbl::Reader<A>) -> anyhow::Result<()> {
|
||||||
Ok(self.main.put::<_, Str, MtblCodec>(wtxn, DOCUMENTS_KEY, documents)?)
|
Ok(self.main.put::<_, Str, MtblCodec<A>>(wtxn, DOCUMENTS_KEY, documents)?)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the number of documents indexed in the database.
|
/// Returns the number of documents indexed in the database.
|
||||||
pub fn number_of_documents<'t>(&self, rtxn: &'t heed::RoTxn) -> anyhow::Result<usize> {
|
pub fn number_of_documents<'t>(&self, rtxn: &'t heed::RoTxn) -> anyhow::Result<usize> {
|
||||||
match self.main.get::<_, Str, MtblCodec>(rtxn, DOCUMENTS_KEY)? {
|
match self.main.get::<_, Str, MtblCodec<&[u8]>>(rtxn, DOCUMENTS_KEY)? {
|
||||||
Some(documents) => Ok(documents.metadata().count_entries as usize),
|
Some(documents) => Ok(documents.metadata().count_entries as usize),
|
||||||
None => return Ok(0),
|
None => return Ok(0),
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user