feat: Use sdset Set primitives everywhere possible

This commit is contained in:
Clément Renault 2018-12-09 14:18:23 +01:00
parent 06ba82cfa8
commit 6cb1bfd815
No known key found for this signature in database
GPG Key ID: 0151CDAB43460DAE
8 changed files with 41 additions and 35 deletions

View File

@ -4,6 +4,7 @@ use std::path::Path;
use std::sync::Arc;
use std::{io, mem};
use sdset::Set;
use fst::raw::MmapReadOnly;
use serde::ser::{Serialize, Serializer};
@ -42,11 +43,12 @@ impl DocIds {
self.doc_ids().binary_search(&doc).is_ok()
}
pub fn doc_ids(&self) -> &[DocumentId] {
pub fn doc_ids(&self) -> &Set<DocumentId> {
let slice = &self.data;
let ptr = slice.as_ptr() as *const DocumentId;
let len = slice.len() / mem::size_of::<DocumentId>();
unsafe { from_raw_parts(ptr, len) }
let slice = unsafe { from_raw_parts(ptr, len) };
Set::new_unchecked(slice)
}
}

View File

@ -5,8 +5,9 @@ use std::ops::Index;
use std::path::Path;
use std::sync::Arc;
use fst::raw::MmapReadOnly;
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
use fst::raw::MmapReadOnly;
use sdset::Set;
use crate::DocIndex;
use crate::data::Data;
@ -64,11 +65,12 @@ impl DocIndexes {
bytes
}
pub fn get(&self, index: usize) -> Option<&[DocIndex]> {
pub fn get(&self, index: usize) -> Option<&Set<DocIndex>> {
self.ranges().get(index as usize).map(|Range { start, end }| {
let start = *start as usize;
let end = *end as usize;
&self.indexes()[start..end]
let slice = &self.indexes()[start..end];
Set::new_unchecked(slice)
})
}
@ -117,7 +119,7 @@ impl<W: Write> DocIndexesBuilder<W> {
}
}
pub fn insert(&mut self, indexes: &[DocIndex]) -> io::Result<()> {
pub fn insert(&mut self, indexes: &Set<DocIndex>) -> io::Result<()> {
let len = indexes.len() as u64;
let start = self.ranges.last().map(|r| r.end).unwrap_or(0);
let range = Range { start, end: start + len };
@ -164,16 +166,16 @@ mod tests {
let mut builder = DocIndexesBuilder::memory();
builder.insert(&[a])?;
builder.insert(&[a, b, c])?;
builder.insert(&[a, c])?;
builder.insert(Set::new(&[a])?)?;
builder.insert(Set::new(&[a, b, c])?)?;
builder.insert(Set::new(&[a, c])?)?;
let bytes = builder.into_inner()?;
let docs = DocIndexes::from_bytes(bytes)?;
assert_eq!(docs.get(0), Some(&[a][..]));
assert_eq!(docs.get(1), Some(&[a, b, c][..]));
assert_eq!(docs.get(2), Some(&[a, c][..]));
assert_eq!(docs.get(0), Some(Set::new(&[a])?));
assert_eq!(docs.get(1), Some(Set::new(&[a, b, c])?));
assert_eq!(docs.get(2), Some(Set::new(&[a, c])?));
assert_eq!(docs.get(3), None);
Ok(())
@ -187,9 +189,9 @@ mod tests {
let mut builder = DocIndexesBuilder::memory();
builder.insert(&[a])?;
builder.insert(&[a, b, c])?;
builder.insert(&[a, c])?;
builder.insert(Set::new(&[a])?)?;
builder.insert(Set::new(&[a, b, c])?)?;
builder.insert(Set::new(&[a, c])?)?;
let builder_bytes = builder.into_inner()?;
let docs = DocIndexes::from_bytes(builder_bytes.clone())?;

View File

@ -2,6 +2,7 @@ use std::error::Error;
use std::path::Path;
use std::fmt;
use sdset::Set;
use serde::de::{self, Deserialize, Deserializer};
use serde::ser::{Serialize, Serializer};
use crate::data::DocIds;
@ -38,8 +39,8 @@ impl NegativeBlob {
}
}
impl AsRef<[DocumentId]> for NegativeBlob {
fn as_ref(&self) -> &[DocumentId] {
impl AsRef<Set<DocumentId>> for NegativeBlob {
fn as_ref(&self) -> &Set<DocumentId> {
self.as_ids().doc_ids()
}
}
@ -47,7 +48,7 @@ impl AsRef<[DocumentId]> for NegativeBlob {
impl fmt::Debug for NegativeBlob {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "NegativeBlob(")?;
f.debug_list().entries(self.as_ref()).finish()?;
f.debug_list().entries(self.as_ref().as_slice()).finish()?;
write!(f, ")")
}
}

View File

@ -1,9 +1,9 @@
use std::error::Error;
use fst::{IntoStreamer, Streamer};
use group_by::GroupBy;
use sdset::duo::DifferenceByKey;
use sdset::{Set, SetOperation};
use group_by::GroupBy;
use crate::database::blob::{Blob, Sign, PositiveBlob, PositiveBlobBuilder, NegativeBlob};
use crate::database::blob::{positive, negative};
@ -89,18 +89,16 @@ impl OpBuilder {
};
let mut builder = PositiveBlobBuilder::memory();
let doc_ids = Set::new_unchecked(negative.as_ref());
let op_builder = positive::OpBuilder::new().add(&base).add(&positive);
let mut stream = op_builder.union().into_stream();
while let Some((input, doc_indexes)) = stream.next() {
let doc_indexes = Set::new_unchecked(doc_indexes);
let op = DifferenceByKey::new(doc_indexes, doc_ids, |x| x.document_id, |x| *x);
let op = DifferenceByKey::new(doc_indexes, negative.as_ref(), |x| x.document_id, |x| *x);
buffer.clear();
op.extend_vec(&mut buffer);
if !buffer.is_empty() {
builder.insert(input, &buffer)?;
builder.insert(input, Set::new_unchecked(&buffer))?;
}
}

View File

@ -4,6 +4,7 @@ use std::path::Path;
use std::error::Error;
use fst::{map, Map, Streamer, IntoStreamer};
use sdset::Set;
use crate::DocIndex;
use crate::data::{DocIndexes, DocIndexesBuilder};
@ -177,7 +178,7 @@ impl<W: Write, X: Write> PositiveBlobBuilder<W, X> {
/// then an error is returned. Similarly, if there was a problem writing
/// to the underlying writer, an error is returned.
// FIXME what if one write doesn't work but the other do ?
pub fn insert<K>(&mut self, key: K, doc_indexes: &[DocIndex]) -> Result<(), Box<Error>>
pub fn insert<K>(&mut self, key: K, doc_indexes: &Set<DocIndex>) -> Result<(), Box<Error>>
where K: AsRef<[u8]>,
{
self.map.insert(key, self.value)?;
@ -210,9 +211,9 @@ mod tests {
let mut builder = PositiveBlobBuilder::memory();
builder.insert("aaa", &[a])?;
builder.insert("aab", &[a, b, c])?;
builder.insert("aac", &[a, c])?;
builder.insert("aaa", Set::new(&[a])?)?;
builder.insert("aab", Set::new(&[a, b, c])?)?;
builder.insert("aac", Set::new(&[a, c])?)?;
let (map_bytes, indexes_bytes) = builder.into_inner()?;
let positive_blob = PositiveBlob::from_bytes(map_bytes, indexes_bytes)?;
@ -233,9 +234,9 @@ mod tests {
let mut builder = PositiveBlobBuilder::memory();
builder.insert("aaa", &[a])?;
builder.insert("aab", &[a, b, c])?;
builder.insert("aac", &[a, c])?;
builder.insert("aaa", Set::new(&[a])?)?;
builder.insert("aab", Set::new(&[a, b, c])?)?;
builder.insert("aac", Set::new(&[a, c])?)?;
let (map_bytes, indexes_bytes) = builder.into_inner()?;
let positive_blob = PositiveBlob::from_bytes(map_bytes, indexes_bytes)?;

View File

@ -74,7 +74,7 @@ impl<'m> $name<'m> {
}
impl<'m, 'a> fst::Streamer<'a> for $name<'m> {
type Item = (&'a [u8], &'a [DocIndex]);
type Item = (&'a [u8], &'a Set<DocIndex>);
fn next(&'a mut self) -> Option<Self::Item> {
// loop {
@ -114,7 +114,7 @@ impl<'m, 'a> fst::Streamer<'a> for $name<'m> {
builder.$operation().extend_vec(&mut self.outs);
if self.outs.is_empty() { return None }
return Some((input, &self.outs))
return Some((input, Set::new_unchecked(&self.outs)))
},
None => None
}

View File

@ -48,9 +48,9 @@ impl NegativeUpdateBuilder {
Blob::Positive(_) => unreachable!(),
};
for &document_id in negative_blob.as_ref() {
for &document_id in negative_blob.as_ref().as_slice() {
let start = DocumentKey::new(document_id);
let end = DocumentKey::new(document_id + 1);
let end = start.with_attribute_max();
file_writer.delete_range(start.as_ref(), end.as_ref())?;
}

View File

@ -4,6 +4,8 @@ use std::collections::BTreeMap;
use std::error::Error;
use std::io::Write;
use sdset::Set;
use crate::database::blob::positive::PositiveBlobBuilder;
use crate::DocIndex;
@ -40,7 +42,7 @@ impl<W: Write, X: Write> UnorderedPositiveBlobBuilder<W, X> {
pub fn into_inner(mut self) -> Result<(W, X), Box<Error>> {
for (key, mut doc_indexes) in self.map {
doc_indexes.sort_unstable();
self.builder.insert(&key, &doc_indexes)?;
self.builder.insert(&key, Set::new_unchecked(&doc_indexes))?;
}
self.builder.into_inner()
}