From b636e5fe5758eabeac54851254303bfba1bbd6aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Mon, 26 Nov 2018 17:30:19 +0100 Subject: [PATCH] feat: Introduce the "data-index" entry with merge compaction --- Cargo.toml | 3 +- src/blob/merge.rs | 10 +- src/blob/mod.rs | 98 --------------- src/blob/negative_blob.rs | 8 +- src/blob/positive_blob.rs | 17 ++- src/data/doc_ids.rs | 4 + src/data/doc_indexes.rs | 105 +++++++++++----- src/data/mod.rs | 12 +- src/index/identifier.rs | 40 +----- src/index/mod.rs | 182 ++++++++++++++++++++-------- src/index/update/mod.rs | 2 - src/index/update/positive_update.rs | 1 - 12 files changed, 251 insertions(+), 231 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index eff4dac16..c95d91bee 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,10 +8,9 @@ authors = ["Kerollmops "] bincode = "1.0" byteorder = "1.2" fnv = "1.0" -fs2 = "0.4" lazy_static = "1.1" linked-hash-map = { version = "0.5", features = ["serde_impl"] } -sdset = "0.2" +sdset = "0.3" serde = "1.0" serde_derive = "1.0" unidecode = "0.3" diff --git a/src/blob/merge.rs b/src/blob/merge.rs index e21398587..c16e62b27 100644 --- a/src/blob/merge.rs +++ b/src/blob/merge.rs @@ -1,17 +1,17 @@ -use crate::vec_read_only::VecReadOnly; use std::collections::BinaryHeap; -use std::{mem, cmp}; use std::rc::Rc; +use std::cmp; use fst::{Automaton, Streamer}; use fst::automaton::AlwaysMatch; -use sdset::{Set, SetBuf, SetOperation}; +use sdset::{Set, SetOperation}; use sdset::duo::OpBuilder as SdOpBuilder; use group_by::GroupBy; -use crate::blob::{Blob, Sign}; -use crate::blob::ops::{OpBuilder, Union, IndexedDocIndexes}; use crate::DocIndex; +use crate::blob::{Blob, Sign}; +use crate::vec_read_only::VecReadOnly; +use crate::blob::ops::{OpBuilder, Union, IndexedDocIndexes}; fn group_is_negative(blobs: &&[Blob]) -> bool { blobs[0].sign() == Sign::Negative diff --git a/src/blob/mod.rs b/src/blob/mod.rs index 7b404f3ce..37646f789 100644 --- a/src/blob/mod.rs +++ b/src/blob/mod.rs @@ -12,7 +12,6 @@ use std::error::Error; use std::io::{Write, Read}; use std::{io, fmt, mem}; -use fst::Map; use uuid::Uuid; use rocksdb::rocksdb::{DB, Snapshot}; use serde::ser::{Serialize, Serializer, SerializeTuple}; @@ -108,100 +107,3 @@ impl Sign { } } } - -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)] -pub struct BlobName(Uuid); - -impl BlobName { - pub fn new() -> BlobName { - BlobName(Uuid::new_v4()) - } - - pub fn as_bytes(&self) -> &[u8; 16] { - self.0.as_bytes() - } -} - -impl fmt::Display for BlobName { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - f.debug_tuple("BlobName") - .field(&self.0.to_hyphenated().to_string()) - .finish() - } -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] -pub struct BlobInfo { - pub sign: Sign, - pub name: BlobName, -} - -impl BlobInfo { - pub fn new_positive() -> BlobInfo { - BlobInfo { - sign: Sign::Positive, - name: BlobName::new(), - } - } - - pub fn new_negative() -> BlobInfo { - BlobInfo { - sign: Sign::Negative, - name: BlobName::new(), - } - } - - pub fn read_from(reader: R) -> bincode::Result { - bincode::deserialize_from(reader) - } - - pub fn read_from_slice(slice: &[u8]) -> bincode::Result> { - let len = slice.len() / mem::size_of::(); - let mut blob_infos = Vec::with_capacity(len); - - let mut cursor = io::Cursor::new(slice); - while blob_infos.len() != len { - let blob_info = BlobInfo::read_from(&mut cursor)?; - blob_infos.push(blob_info); - } - - Ok(blob_infos) - } - - pub fn write_into(&self, writer: W) -> bincode::Result<()> { - bincode::serialize_into(writer, self) - } -} - -pub fn blobs_from_blob_infos(infos: &[BlobInfo], snapshot: &Snapshot<&DB>) -> Result, Box> { - let mut blobs = Vec::with_capacity(infos.len()); - - for info in infos { - let blob = match info.sign { - Sign::Positive => { - let blob_key = Identifier::blob(info.name).fst_map().build(); - let map = match snapshot.get(&blob_key)? { - Some(value) => value.to_vec(), - None => return Err(format!("No fst entry found for blob {}", info.name).into()), - }; - let blob_key = Identifier::blob(info.name).document_indexes().build(); - let doc_idx = match snapshot.get(&blob_key)? { - Some(value) => value.to_vec(), - None => return Err(format!("No doc-idx entry found for blob {}", info.name).into()), - }; - PositiveBlob::from_bytes(map, doc_idx).map(Blob::Positive)? - }, - Sign::Negative => { - let blob_key = Identifier::blob(info.name).document_ids().build(); - let doc_ids = match snapshot.get(&blob_key)? { - Some(value) => value.to_vec(), - None => return Err(format!("No doc-ids entry found for blob {}", info.name).into()), - }; - NegativeBlob::from_bytes(doc_ids).map(Blob::Negative)? - }, - }; - blobs.push(blob); - } - - Ok(blobs) -} diff --git a/src/blob/negative_blob.rs b/src/blob/negative_blob.rs index 19ff5d026..e81ad2616 100644 --- a/src/blob/negative_blob.rs +++ b/src/blob/negative_blob.rs @@ -1,6 +1,6 @@ -use std::io::{Read, Write}; -use std::error::Error; +use std::io::Write; use std::path::Path; +use std::error::Error; use crate::DocumentId; use crate::data::{DocIds, DocIdsBuilder}; @@ -24,6 +24,10 @@ impl NegativeBlob { Ok(NegativeBlob { doc_ids }) } + pub fn from_raw(doc_ids: DocIds) -> Self { + NegativeBlob { doc_ids } + } + pub fn as_ids(&self) -> &DocIds { &self.doc_ids } diff --git a/src/blob/positive_blob.rs b/src/blob/positive_blob.rs index 29966cd68..baf71df5f 100644 --- a/src/blob/positive_blob.rs +++ b/src/blob/positive_blob.rs @@ -1,7 +1,7 @@ -use std::io::{Read, Write}; -use std::error::Error; -use std::path::Path; use std::fmt; +use std::io::Write; +use std::path::Path; +use std::error::Error; use fst::{Map, MapBuilder}; @@ -10,6 +10,7 @@ use crate::data::{DocIndexes, DocIndexesBuilder}; use serde::ser::{Serialize, Serializer, SerializeTuple}; use serde::de::{self, Deserialize, Deserializer, SeqAccess, Visitor}; +#[derive(Default)] pub struct PositiveBlob { map: Map, indexes: DocIndexes, @@ -31,6 +32,10 @@ impl PositiveBlob { Ok(PositiveBlob { map, indexes }) } + pub fn from_raw(map: Map, indexes: DocIndexes) -> Self { + PositiveBlob { map, indexes } + } + pub fn get>(&self, key: K) -> Option<&[DocIndex]> { self.map.get(key).and_then(|index| self.indexes.get(index)) } @@ -109,7 +114,7 @@ impl PositiveBlobBuilder { } pub fn finish(self) -> Result<(), Box> { - self.into_inner().map(|_| ()) + self.into_inner().map(drop) } pub fn into_inner(self) -> Result<(W, X), Box> { @@ -130,6 +135,10 @@ impl PositiveBlobBuilder { } impl PositiveBlobBuilder, Vec> { + pub fn memory() -> Self { + PositiveBlobBuilder::new(Vec::new(), Vec::new()) + } + pub fn build(self) -> Result> { self.into_inner().and_then(|(m, i)| PositiveBlob::from_bytes(m, i)) } diff --git a/src/data/doc_ids.rs b/src/data/doc_ids.rs index 168ca0b46..afc476e92 100644 --- a/src/data/doc_ids.rs +++ b/src/data/doc_ids.rs @@ -35,6 +35,10 @@ impl DocIds { Ok(DocIds { data }) } + pub fn from_document_ids(vec: Vec) -> Self { + DocIds::from_bytes(unsafe { mem::transmute(vec) }).unwrap() + } + pub fn contains(&self, doc: DocumentId) -> bool { // FIXME prefer using the sdset::exponential_search function self.doc_ids().binary_search(&doc).is_ok() diff --git a/src/data/doc_indexes.rs b/src/data/doc_indexes.rs index 97222b205..82eb3a86b 100644 --- a/src/data/doc_indexes.rs +++ b/src/data/doc_indexes.rs @@ -19,7 +19,7 @@ struct Range { end: u64, } -#[derive(Clone)] +#[derive(Clone, Default)] pub struct DocIndexes { ranges: Data, indexes: Data, @@ -29,15 +29,14 @@ impl DocIndexes { pub unsafe fn from_path>(path: P) -> io::Result { let mmap = MmapReadOnly::open_path(path)?; - let range_len = mmap.as_slice().read_u64::()?; - let range_len = range_len as usize * mem::size_of::(); + let ranges_len_offset = mmap.as_slice().len() - mem::size_of::(); + let ranges_len = (&mmap.as_slice()[ranges_len_offset..]).read_u64::()?; + let ranges_len = ranges_len as usize * mem::size_of::(); - let offset = mem::size_of::() as usize; - let ranges = Data::Mmap(mmap.range(offset, range_len)); + let ranges_offset = ranges_len_offset - ranges_len; + let ranges = Data::Mmap(mmap.range(ranges_offset, ranges_len)); - let len = mmap.len() - range_len - offset; - let offset = offset + range_len; - let indexes = Data::Mmap(mmap.range(offset, len)); + let indexes = Data::Mmap(mmap.range(0, ranges_offset)); Ok(DocIndexes { ranges, indexes }) } @@ -45,19 +44,22 @@ impl DocIndexes { pub fn from_bytes(vec: Vec) -> io::Result { let vec = Arc::new(vec); - let range_len = vec.as_slice().read_u64::()?; - let range_len = range_len as usize * mem::size_of::(); + let ranges_len_offset = vec.len() - mem::size_of::(); + let ranges_len = (&vec[ranges_len_offset..]).read_u64::()?; + let ranges_len = ranges_len as usize * mem::size_of::(); - let offset = mem::size_of::() as usize; + let ranges_offset = ranges_len_offset - ranges_len; let ranges = Data::Shared { vec: vec.clone(), - offset, - len: range_len + offset: ranges_offset, + len: ranges_len, }; - let len = vec.len() - range_len - offset; - let offset = offset + range_len; - let indexes = Data::Shared { vec, offset, len }; + let indexes = Data::Shared { + vec: vec, + offset: 0, + len: ranges_offset, + }; Ok(DocIndexes { ranges, indexes }) } @@ -94,6 +96,53 @@ impl Serialize for DocIndexes { } } +pub struct RawDocIndexesBuilder { + ranges: Vec, + wtr: W, +} + +impl RawDocIndexesBuilder> { + pub fn memory() -> Self { + RawDocIndexesBuilder::new(Vec::new()) + } +} + +impl RawDocIndexesBuilder { + pub fn new(wtr: W) -> Self { + RawDocIndexesBuilder { + ranges: Vec::new(), + wtr: wtr, + } + } + + pub fn insert(&mut self, indexes: &[DocIndex]) -> io::Result<()> { + let len = indexes.len() as u64; + let start = self.ranges.last().map(|r| r.start).unwrap_or(0); + let range = Range { start, end: start + len }; + self.ranges.push(range); + + // write the values + let indexes = unsafe { into_u8_slice(indexes) }; + self.wtr.write_all(indexes) + } + + pub fn finish(self) -> io::Result<()> { + self.into_inner().map(drop) + } + + pub fn into_inner(mut self) -> io::Result { + // write the ranges + let ranges = unsafe { into_u8_slice(self.ranges.as_slice()) }; + self.wtr.write_all(ranges)?; + + // write the length of the ranges + let len = ranges.len() as u64; + self.wtr.write_u64::(len)?; + + Ok(self.wtr) + } +} + pub struct DocIndexesBuilder { keys: BTreeMap, indexes: Vec>, @@ -136,29 +185,27 @@ impl DocIndexesBuilder { } pub fn finish(self) -> io::Result<()> { - self.into_inner().map(|_| ()) + self.into_inner().map(drop) } pub fn into_inner(mut self) -> io::Result { - for vec in &mut self.indexes { vec.sort_unstable(); } let (ranges, values) = into_sliced_ranges(self.indexes, self.number_docs); + + // write values first + let slice = unsafe { into_u8_slice(values.as_slice()) }; + self.wtr.write_all(slice)?; + + // write ranges after + let slice = unsafe { into_u8_slice(ranges.as_slice()) }; + self.wtr.write_all(slice)?; + + // write the length of the ranges let len = ranges.len() as u64; - - // TODO check if this is correct self.wtr.write_u64::(len)?; - unsafe { - // write Ranges first - let slice = into_u8_slice(ranges.as_slice()); - self.wtr.write_all(slice)?; - - // write Values after - let slice = into_u8_slice(values.as_slice()); - self.wtr.write_all(slice)?; - } self.wtr.flush()?; Ok(self.wtr) diff --git a/src/data/mod.rs b/src/data/mod.rs index ca9816fa3..8c0329bf4 100644 --- a/src/data/mod.rs +++ b/src/data/mod.rs @@ -7,7 +7,7 @@ use std::sync::Arc; use fst::raw::MmapReadOnly; pub use self::doc_ids::{DocIds, DocIdsBuilder}; -pub use self::doc_indexes::{DocIndexes, DocIndexesBuilder}; +pub use self::doc_indexes::{DocIndexes, DocIndexesBuilder, RawDocIndexesBuilder}; #[derive(Clone)] enum Data { @@ -19,6 +19,16 @@ enum Data { Mmap(MmapReadOnly), } +impl Default for Data { + fn default() -> Data { + Data::Shared { + vec: Arc::default(), + offset: 0, + len: 0, + } + } +} + impl Deref for Data { type Target = [u8]; diff --git a/src/index/identifier.rs b/src/index/identifier.rs index 0a2f3f3d4..ee36ab314 100644 --- a/src/index/identifier.rs +++ b/src/index/identifier.rs @@ -3,7 +3,6 @@ use std::io::Write; use byteorder::{NetworkEndian, WriteBytesExt}; use crate::index::schema::SchemaAttr; -use crate::blob::BlobName; use crate::DocumentId; pub struct Identifier { @@ -17,13 +16,6 @@ impl Identifier { Data { inner } } - pub fn blob(name: BlobName) -> Blob { - let mut inner = Vec::new(); - let _ = inner.write(b"blob"); - let _ = inner.write(name.as_bytes()); - Blob { inner } - } - pub fn document(id: DocumentId) -> Document { let mut inner = Vec::new(); let _ = inner.write(b"docu"); @@ -38,9 +30,9 @@ pub struct Data { } impl Data { - pub fn blobs_order(mut self) -> Self { + pub fn index(mut self) -> Self { let _ = self.inner.write(b"-"); - let _ = self.inner.write(b"blobs-order"); + let _ = self.inner.write(b"index"); self } @@ -55,34 +47,6 @@ impl Data { } } -pub struct Blob { - inner: Vec, -} - -impl Blob { - pub fn document_indexes(mut self) -> Self { - let _ = self.inner.write(b"-"); - let _ = self.inner.write(b"doc-idx"); - self - } - - pub fn document_ids(mut self) -> Self { - let _ = self.inner.write(b"-"); - let _ = self.inner.write(b"doc-ids"); - self - } - - pub fn fst_map(mut self) -> Self { - let _ = self.inner.write(b"-"); - let _ = self.inner.write(b"fst"); - self - } - - pub fn build(self) -> Vec { - self.inner - } -} - pub struct Document { inner: Vec, } diff --git a/src/index/mod.rs b/src/index/mod.rs index 9de25f476..3e005e85e 100644 --- a/src/index/mod.rs +++ b/src/index/mod.rs @@ -2,76 +2,163 @@ pub mod identifier; pub mod schema; pub mod update; -use std::io; -use std::rc::Rc; use std::error::Error; -use std::fs::{self, File}; -use std::fmt::{self, Write}; -use std::ops::{Deref, BitOr}; -use std::path::{Path, PathBuf}; -use std::collections::{BTreeSet, BTreeMap}; +use std::path::Path; -use fs2::FileExt; +use fst::map::{Map, MapBuilder, OpBuilder}; +use fst::{IntoStreamer, Streamer}; +use sdset::duo::Union as SdUnion; +use sdset::duo::DifferenceByKey; +use sdset::{Set, SetOperation}; use ::rocksdb::rocksdb::Writable; use ::rocksdb::{rocksdb, rocksdb_options}; use ::rocksdb::merge_operator::MergeOperands; +use crate::DocIndex; +use crate::automaton; use crate::rank::Document; -use crate::data::DocIdsBuilder; -use crate::{DocIndex, DocumentId}; use crate::index::schema::Schema; use crate::index::update::Update; +use crate::tokenizer::TokenizerBuilder; use crate::index::identifier::Identifier; -use crate::blob::{PositiveBlobBuilder, PositiveBlob, BlobInfo, Sign, Blob, blobs_from_blob_infos}; -use crate::tokenizer::{TokenizerBuilder, DefaultBuilder, Tokenizer}; use crate::rank::{criterion, Config, RankedStream}; -use crate::automaton; +use crate::data::{DocIds, DocIndexes, RawDocIndexesBuilder}; +use crate::blob::{PositiveBlob, NegativeBlob, Blob}; -fn simple_vec_append(key: &[u8], value: Option<&[u8]>, operands: &mut MergeOperands) -> Vec { - let mut output = Vec::new(); - for bytes in operands.chain(value) { - output.extend_from_slice(bytes); +fn union_positives(a: &PositiveBlob, b: &PositiveBlob) -> Result> { + let (a_map, a_indexes) = (a.as_map(), a.as_indexes()); + let (b_map, b_indexes) = (b.as_map(), b.as_indexes()); + + let mut map_builder = MapBuilder::memory(); + let mut indexes_builder = RawDocIndexesBuilder::memory(); + + let op_builder = OpBuilder::new().add(a_map).add(b_map); + let mut stream = op_builder.union(); + let mut i = 0; + + while let Some((key, indexed)) = stream.next() { + let doc_idx: Vec = match indexed { + [a, b] => { + let a_doc_idx = a_indexes.get(a.value).expect("BUG: could not find document indexes"); + let b_doc_idx = b_indexes.get(b.value).expect("BUG: could not find document indexes"); + + let a_doc_idx = Set::new_unchecked(a_doc_idx); + let b_doc_idx = Set::new_unchecked(b_doc_idx); + + let sd_union = SdUnion::new(a_doc_idx, b_doc_idx); + sd_union.into_set_buf().into_vec() + }, + [a] => { + let indexes = if a.index == 0 { a_indexes } else { b_indexes }; + let doc_idx = indexes.get(a.value).expect("BUG: could not find document indexes"); + doc_idx.to_vec() + }, + _ => unreachable!(), + }; + + if !doc_idx.is_empty() { + map_builder.insert(key, i)?; + indexes_builder.insert(&doc_idx)?; + i += 1; + } } - output + + let inner = map_builder.into_inner()?; + let map = Map::from_bytes(inner)?; + + let inner = indexes_builder.into_inner()?; + let indexes = DocIndexes::from_bytes(inner)?; + + Ok(PositiveBlob::from_raw(map, indexes)) } -pub struct MergeBuilder { - blobs: Vec, +fn union_negatives(a: &NegativeBlob, b: &NegativeBlob) -> NegativeBlob { + let a_doc_ids = a.as_ids().doc_ids(); + let b_doc_ids = b.as_ids().doc_ids(); + + let a_doc_ids = Set::new_unchecked(a_doc_ids); + let b_doc_ids = Set::new_unchecked(b_doc_ids); + + let sd_union = SdUnion::new(a_doc_ids, b_doc_ids); + let doc_ids = sd_union.into_set_buf().into_vec(); + let doc_ids = DocIds::from_document_ids(doc_ids); + + NegativeBlob::from_raw(doc_ids) } -impl MergeBuilder { - pub fn new() -> MergeBuilder { - MergeBuilder { blobs: Vec::new() } +fn merge_positive_negative(pos: &PositiveBlob, neg: &NegativeBlob) -> Result> { + let (map, indexes) = (pos.as_map(), pos.as_indexes()); + let doc_ids = neg.as_ids().doc_ids(); + + let doc_ids = Set::new_unchecked(doc_ids); + + let mut map_builder = MapBuilder::memory(); + let mut indexes_builder = RawDocIndexesBuilder::memory(); + + let mut stream = map.into_stream(); + let mut i = 0; + + while let Some((key, index)) = stream.next() { + let doc_idx = indexes.get(index).expect("BUG: could not find document indexes"); + let doc_idx = Set::new_unchecked(doc_idx); + + let diff = DifferenceByKey::new(doc_idx, doc_ids, |&d| d.document_id, |id| *id); + let doc_idx: Vec = diff.into_set_buf().into_vec(); + + map_builder.insert(key, i)?; + indexes_builder.insert(&doc_idx)?; + i += 1; } - pub fn push(&mut self, blob: Blob) { - if blob.sign() == Sign::Negative && self.blobs.is_empty() { return } - self.blobs.push(blob); + let inner = map_builder.into_inner()?; + let map = Map::from_bytes(inner)?; + + let inner = indexes_builder.into_inner()?; + let indexes = DocIndexes::from_bytes(inner)?; + + Ok(PositiveBlob::from_raw(map, indexes)) +} + +#[derive(Default)] +struct Merge { + blob: PositiveBlob, +} + +impl Merge { + fn new(blob: PositiveBlob) -> Merge { + Merge { blob } } - pub fn merge(self) -> PositiveBlob { - unimplemented!() + fn merge(&mut self, blob: Blob) { + self.blob = match blob { + Blob::Positive(blob) => union_positives(&self.blob, &blob).unwrap(), + Blob::Negative(blob) => merge_positive_negative(&self.blob, &blob).unwrap(), + }; + } + + fn build(self) -> PositiveBlob { + self.blob } } fn merge_indexes(key: &[u8], existing_value: Option<&[u8]>, operands: &mut MergeOperands) -> Vec { - if key != b"data-index" { panic!("The merge operator only allow \"data-index\" merging") } + if key != b"data-index" { panic!("The merge operator only supports \"data-index\" merging") } - let mut merge_builder = MergeBuilder::new(); - - if let Some(existing_value) = existing_value { - let base: PositiveBlob = bincode::deserialize(existing_value).unwrap(); // FIXME what do we do here ? - merge_builder.push(Blob::Positive(base)); - } + let mut merge = match existing_value { + Some(existing_value) => { + let blob = bincode::deserialize(existing_value).expect("BUG: could not deserialize data-index"); + Merge::new(blob) + }, + None => Merge::default(), + }; for bytes in operands { - let blob: Blob = bincode::deserialize(bytes).unwrap(); - merge_builder.push(blob); + let blob = bincode::deserialize(bytes).expect("BUG: could not deserialize blobs"); + merge.merge(blob); } - let blob = merge_builder.merge(); - // blob.to_vec() - unimplemented!() + let blob = merge.build(); + bincode::serialize(&blob).expect("BUG: could not serialize merged blob") } pub struct Index { @@ -95,7 +182,7 @@ impl Index { opts.create_if_missing(true); let mut cf_opts = rocksdb_options::ColumnFamilyOptions::new(); - cf_opts.add_merge_operator("blobs order operator", simple_vec_append); + cf_opts.add_merge_operator("data-index merge operator", merge_indexes); let database = rocksdb::DB::open_cf(opts, &path, vec![("default", cf_opts)])?; @@ -114,7 +201,7 @@ impl Index { opts.create_if_missing(false); let mut cf_opts = rocksdb_options::ColumnFamilyOptions::new(); - cf_opts.add_merge_operator("blobs order operator", simple_vec_append); + cf_opts.add_merge_operator("data-index merge operator", merge_indexes); let database = rocksdb::DB::open_cf(opts, &path, vec![("default", cf_opts)])?; @@ -150,12 +237,9 @@ impl Index { // this snapshot will allow consistent reads for the whole search operation let snapshot = self.database.snapshot(); - let data_key = Identifier::data().blobs_order().build(); - let blobs = match snapshot.get(&data_key)? { - Some(value) => { - let blob_infos = BlobInfo::read_from_slice(&value)?; - blobs_from_blob_infos(&blob_infos, &snapshot)? - }, + let index_key = Identifier::data().index().build(); + let map = match snapshot.get(&index_key)? { + Some(value) => bincode::deserialize(&value)?, None => Vec::new(), }; @@ -166,7 +250,7 @@ impl Index { } let config = Config { - blobs: &blobs, + map: map, automatons: automatons, criteria: criterion::default(), distinct: ((), 1), diff --git a/src/index/update/mod.rs b/src/index/update/mod.rs index c6befe9f6..a53c44852 100644 --- a/src/index/update/mod.rs +++ b/src/index/update/mod.rs @@ -1,8 +1,6 @@ use std::path::PathBuf; use std::error::Error; -use ::rocksdb::rocksdb_options; - use crate::blob::{BlobName, Sign}; mod negative_update; diff --git a/src/index/update/positive_update.rs b/src/index/update/positive_update.rs index ad6e0f5d0..d8a5a3796 100644 --- a/src/index/update/positive_update.rs +++ b/src/index/update/positive_update.rs @@ -1,7 +1,6 @@ use std::collections::BTreeMap; use std::path::PathBuf; use std::error::Error; -use std::fmt::Write; use ::rocksdb::rocksdb_options;