From 6cb1bfd8157ac15ef3d80fec29531adfa437fe06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sun, 9 Dec 2018 14:18:23 +0100 Subject: [PATCH] feat: Use sdset Set primitives everywhere possible --- src/data/doc_ids.rs | 6 ++-- src/data/doc_indexes.rs | 28 ++++++++++--------- src/database/blob/negative/blob.rs | 7 +++-- src/database/blob/ops.rs | 8 ++---- src/database/blob/positive/blob.rs | 15 +++++----- src/database/blob/positive/ops.rs | 4 +-- src/database/update/negative/update.rs | 4 +-- .../update/positive/unordered_builder.rs | 4 ++- 8 files changed, 41 insertions(+), 35 deletions(-) diff --git a/src/data/doc_ids.rs b/src/data/doc_ids.rs index 7c8613744..11bc9fe75 100644 --- a/src/data/doc_ids.rs +++ b/src/data/doc_ids.rs @@ -4,6 +4,7 @@ use std::path::Path; use std::sync::Arc; use std::{io, mem}; +use sdset::Set; use fst::raw::MmapReadOnly; use serde::ser::{Serialize, Serializer}; @@ -42,11 +43,12 @@ impl DocIds { self.doc_ids().binary_search(&doc).is_ok() } - pub fn doc_ids(&self) -> &[DocumentId] { + pub fn doc_ids(&self) -> &Set { let slice = &self.data; let ptr = slice.as_ptr() as *const DocumentId; let len = slice.len() / mem::size_of::(); - unsafe { from_raw_parts(ptr, len) } + let slice = unsafe { from_raw_parts(ptr, len) }; + Set::new_unchecked(slice) } } diff --git a/src/data/doc_indexes.rs b/src/data/doc_indexes.rs index c7a73a149..832740e4e 100644 --- a/src/data/doc_indexes.rs +++ b/src/data/doc_indexes.rs @@ -5,8 +5,9 @@ use std::ops::Index; use std::path::Path; use std::sync::Arc; -use fst::raw::MmapReadOnly; use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; +use fst::raw::MmapReadOnly; +use sdset::Set; use crate::DocIndex; use crate::data::Data; @@ -64,11 +65,12 @@ impl DocIndexes { bytes } - pub fn get(&self, index: usize) -> Option<&[DocIndex]> { + pub fn get(&self, index: usize) -> Option<&Set> { self.ranges().get(index as usize).map(|Range { start, end }| { let start = *start as usize; let end = *end as usize; - &self.indexes()[start..end] + let slice = &self.indexes()[start..end]; + Set::new_unchecked(slice) }) } @@ -117,7 +119,7 @@ impl DocIndexesBuilder { } } - pub fn insert(&mut self, indexes: &[DocIndex]) -> io::Result<()> { + pub fn insert(&mut self, indexes: &Set) -> io::Result<()> { let len = indexes.len() as u64; let start = self.ranges.last().map(|r| r.end).unwrap_or(0); let range = Range { start, end: start + len }; @@ -164,16 +166,16 @@ mod tests { let mut builder = DocIndexesBuilder::memory(); - builder.insert(&[a])?; - builder.insert(&[a, b, c])?; - builder.insert(&[a, c])?; + builder.insert(Set::new(&[a])?)?; + builder.insert(Set::new(&[a, b, c])?)?; + builder.insert(Set::new(&[a, c])?)?; let bytes = builder.into_inner()?; let docs = DocIndexes::from_bytes(bytes)?; - assert_eq!(docs.get(0), Some(&[a][..])); - assert_eq!(docs.get(1), Some(&[a, b, c][..])); - assert_eq!(docs.get(2), Some(&[a, c][..])); + assert_eq!(docs.get(0), Some(Set::new(&[a])?)); + assert_eq!(docs.get(1), Some(Set::new(&[a, b, c])?)); + assert_eq!(docs.get(2), Some(Set::new(&[a, c])?)); assert_eq!(docs.get(3), None); Ok(()) @@ -187,9 +189,9 @@ mod tests { let mut builder = DocIndexesBuilder::memory(); - builder.insert(&[a])?; - builder.insert(&[a, b, c])?; - builder.insert(&[a, c])?; + builder.insert(Set::new(&[a])?)?; + builder.insert(Set::new(&[a, b, c])?)?; + builder.insert(Set::new(&[a, c])?)?; let builder_bytes = builder.into_inner()?; let docs = DocIndexes::from_bytes(builder_bytes.clone())?; diff --git a/src/database/blob/negative/blob.rs b/src/database/blob/negative/blob.rs index 038c90cf2..04b655b55 100644 --- a/src/database/blob/negative/blob.rs +++ b/src/database/blob/negative/blob.rs @@ -2,6 +2,7 @@ use std::error::Error; use std::path::Path; use std::fmt; +use sdset::Set; use serde::de::{self, Deserialize, Deserializer}; use serde::ser::{Serialize, Serializer}; use crate::data::DocIds; @@ -38,8 +39,8 @@ impl NegativeBlob { } } -impl AsRef<[DocumentId]> for NegativeBlob { - fn as_ref(&self) -> &[DocumentId] { +impl AsRef> for NegativeBlob { + fn as_ref(&self) -> &Set { self.as_ids().doc_ids() } } @@ -47,7 +48,7 @@ impl AsRef<[DocumentId]> for NegativeBlob { impl fmt::Debug for NegativeBlob { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "NegativeBlob(")?; - f.debug_list().entries(self.as_ref()).finish()?; + f.debug_list().entries(self.as_ref().as_slice()).finish()?; write!(f, ")") } } diff --git a/src/database/blob/ops.rs b/src/database/blob/ops.rs index b752739f7..0aeea037d 100644 --- a/src/database/blob/ops.rs +++ b/src/database/blob/ops.rs @@ -1,9 +1,9 @@ use std::error::Error; use fst::{IntoStreamer, Streamer}; -use group_by::GroupBy; use sdset::duo::DifferenceByKey; use sdset::{Set, SetOperation}; +use group_by::GroupBy; use crate::database::blob::{Blob, Sign, PositiveBlob, PositiveBlobBuilder, NegativeBlob}; use crate::database::blob::{positive, negative}; @@ -89,18 +89,16 @@ impl OpBuilder { }; let mut builder = PositiveBlobBuilder::memory(); - let doc_ids = Set::new_unchecked(negative.as_ref()); let op_builder = positive::OpBuilder::new().add(&base).add(&positive); let mut stream = op_builder.union().into_stream(); while let Some((input, doc_indexes)) = stream.next() { - let doc_indexes = Set::new_unchecked(doc_indexes); - let op = DifferenceByKey::new(doc_indexes, doc_ids, |x| x.document_id, |x| *x); + let op = DifferenceByKey::new(doc_indexes, negative.as_ref(), |x| x.document_id, |x| *x); buffer.clear(); op.extend_vec(&mut buffer); if !buffer.is_empty() { - builder.insert(input, &buffer)?; + builder.insert(input, Set::new_unchecked(&buffer))?; } } diff --git a/src/database/blob/positive/blob.rs b/src/database/blob/positive/blob.rs index b58bee7a2..fd6f2251d 100644 --- a/src/database/blob/positive/blob.rs +++ b/src/database/blob/positive/blob.rs @@ -4,6 +4,7 @@ use std::path::Path; use std::error::Error; use fst::{map, Map, Streamer, IntoStreamer}; +use sdset::Set; use crate::DocIndex; use crate::data::{DocIndexes, DocIndexesBuilder}; @@ -177,7 +178,7 @@ impl PositiveBlobBuilder { /// then an error is returned. Similarly, if there was a problem writing /// to the underlying writer, an error is returned. // FIXME what if one write doesn't work but the other do ? - pub fn insert(&mut self, key: K, doc_indexes: &[DocIndex]) -> Result<(), Box> + pub fn insert(&mut self, key: K, doc_indexes: &Set) -> Result<(), Box> where K: AsRef<[u8]>, { self.map.insert(key, self.value)?; @@ -210,9 +211,9 @@ mod tests { let mut builder = PositiveBlobBuilder::memory(); - builder.insert("aaa", &[a])?; - builder.insert("aab", &[a, b, c])?; - builder.insert("aac", &[a, c])?; + builder.insert("aaa", Set::new(&[a])?)?; + builder.insert("aab", Set::new(&[a, b, c])?)?; + builder.insert("aac", Set::new(&[a, c])?)?; let (map_bytes, indexes_bytes) = builder.into_inner()?; let positive_blob = PositiveBlob::from_bytes(map_bytes, indexes_bytes)?; @@ -233,9 +234,9 @@ mod tests { let mut builder = PositiveBlobBuilder::memory(); - builder.insert("aaa", &[a])?; - builder.insert("aab", &[a, b, c])?; - builder.insert("aac", &[a, c])?; + builder.insert("aaa", Set::new(&[a])?)?; + builder.insert("aab", Set::new(&[a, b, c])?)?; + builder.insert("aac", Set::new(&[a, c])?)?; let (map_bytes, indexes_bytes) = builder.into_inner()?; let positive_blob = PositiveBlob::from_bytes(map_bytes, indexes_bytes)?; diff --git a/src/database/blob/positive/ops.rs b/src/database/blob/positive/ops.rs index e94ebdc0d..ffd66cead 100644 --- a/src/database/blob/positive/ops.rs +++ b/src/database/blob/positive/ops.rs @@ -74,7 +74,7 @@ impl<'m> $name<'m> { } impl<'m, 'a> fst::Streamer<'a> for $name<'m> { - type Item = (&'a [u8], &'a [DocIndex]); + type Item = (&'a [u8], &'a Set); fn next(&'a mut self) -> Option { // loop { @@ -114,7 +114,7 @@ impl<'m, 'a> fst::Streamer<'a> for $name<'m> { builder.$operation().extend_vec(&mut self.outs); if self.outs.is_empty() { return None } - return Some((input, &self.outs)) + return Some((input, Set::new_unchecked(&self.outs))) }, None => None } diff --git a/src/database/update/negative/update.rs b/src/database/update/negative/update.rs index 222a29a2c..3d4c4d061 100644 --- a/src/database/update/negative/update.rs +++ b/src/database/update/negative/update.rs @@ -48,9 +48,9 @@ impl NegativeUpdateBuilder { Blob::Positive(_) => unreachable!(), }; - for &document_id in negative_blob.as_ref() { + for &document_id in negative_blob.as_ref().as_slice() { let start = DocumentKey::new(document_id); - let end = DocumentKey::new(document_id + 1); + let end = start.with_attribute_max(); file_writer.delete_range(start.as_ref(), end.as_ref())?; } diff --git a/src/database/update/positive/unordered_builder.rs b/src/database/update/positive/unordered_builder.rs index 7a2139c98..6b9dc5abe 100644 --- a/src/database/update/positive/unordered_builder.rs +++ b/src/database/update/positive/unordered_builder.rs @@ -4,6 +4,8 @@ use std::collections::BTreeMap; use std::error::Error; use std::io::Write; +use sdset::Set; + use crate::database::blob::positive::PositiveBlobBuilder; use crate::DocIndex; @@ -40,7 +42,7 @@ impl UnorderedPositiveBlobBuilder { pub fn into_inner(mut self) -> Result<(W, X), Box> { for (key, mut doc_indexes) in self.map { doc_indexes.sort_unstable(); - self.builder.insert(&key, &doc_indexes)?; + self.builder.insert(&key, Set::new_unchecked(&doc_indexes))?; } self.builder.into_inner() }