mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-30 00:34:26 +01:00
feat: Use sdset Set primitives everywhere possible
This commit is contained in:
parent
06ba82cfa8
commit
6cb1bfd815
@ -4,6 +4,7 @@ use std::path::Path;
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::{io, mem};
|
use std::{io, mem};
|
||||||
|
|
||||||
|
use sdset::Set;
|
||||||
use fst::raw::MmapReadOnly;
|
use fst::raw::MmapReadOnly;
|
||||||
use serde::ser::{Serialize, Serializer};
|
use serde::ser::{Serialize, Serializer};
|
||||||
|
|
||||||
@ -42,11 +43,12 @@ impl DocIds {
|
|||||||
self.doc_ids().binary_search(&doc).is_ok()
|
self.doc_ids().binary_search(&doc).is_ok()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn doc_ids(&self) -> &[DocumentId] {
|
pub fn doc_ids(&self) -> &Set<DocumentId> {
|
||||||
let slice = &self.data;
|
let slice = &self.data;
|
||||||
let ptr = slice.as_ptr() as *const DocumentId;
|
let ptr = slice.as_ptr() as *const DocumentId;
|
||||||
let len = slice.len() / mem::size_of::<DocumentId>();
|
let len = slice.len() / mem::size_of::<DocumentId>();
|
||||||
unsafe { from_raw_parts(ptr, len) }
|
let slice = unsafe { from_raw_parts(ptr, len) };
|
||||||
|
Set::new_unchecked(slice)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5,8 +5,9 @@ use std::ops::Index;
|
|||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use fst::raw::MmapReadOnly;
|
|
||||||
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
|
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
|
||||||
|
use fst::raw::MmapReadOnly;
|
||||||
|
use sdset::Set;
|
||||||
|
|
||||||
use crate::DocIndex;
|
use crate::DocIndex;
|
||||||
use crate::data::Data;
|
use crate::data::Data;
|
||||||
@ -64,11 +65,12 @@ impl DocIndexes {
|
|||||||
bytes
|
bytes
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get(&self, index: usize) -> Option<&[DocIndex]> {
|
pub fn get(&self, index: usize) -> Option<&Set<DocIndex>> {
|
||||||
self.ranges().get(index as usize).map(|Range { start, end }| {
|
self.ranges().get(index as usize).map(|Range { start, end }| {
|
||||||
let start = *start as usize;
|
let start = *start as usize;
|
||||||
let end = *end as usize;
|
let end = *end as usize;
|
||||||
&self.indexes()[start..end]
|
let slice = &self.indexes()[start..end];
|
||||||
|
Set::new_unchecked(slice)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -117,7 +119,7 @@ impl<W: Write> DocIndexesBuilder<W> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn insert(&mut self, indexes: &[DocIndex]) -> io::Result<()> {
|
pub fn insert(&mut self, indexes: &Set<DocIndex>) -> io::Result<()> {
|
||||||
let len = indexes.len() as u64;
|
let len = indexes.len() as u64;
|
||||||
let start = self.ranges.last().map(|r| r.end).unwrap_or(0);
|
let start = self.ranges.last().map(|r| r.end).unwrap_or(0);
|
||||||
let range = Range { start, end: start + len };
|
let range = Range { start, end: start + len };
|
||||||
@ -164,16 +166,16 @@ mod tests {
|
|||||||
|
|
||||||
let mut builder = DocIndexesBuilder::memory();
|
let mut builder = DocIndexesBuilder::memory();
|
||||||
|
|
||||||
builder.insert(&[a])?;
|
builder.insert(Set::new(&[a])?)?;
|
||||||
builder.insert(&[a, b, c])?;
|
builder.insert(Set::new(&[a, b, c])?)?;
|
||||||
builder.insert(&[a, c])?;
|
builder.insert(Set::new(&[a, c])?)?;
|
||||||
|
|
||||||
let bytes = builder.into_inner()?;
|
let bytes = builder.into_inner()?;
|
||||||
let docs = DocIndexes::from_bytes(bytes)?;
|
let docs = DocIndexes::from_bytes(bytes)?;
|
||||||
|
|
||||||
assert_eq!(docs.get(0), Some(&[a][..]));
|
assert_eq!(docs.get(0), Some(Set::new(&[a])?));
|
||||||
assert_eq!(docs.get(1), Some(&[a, b, c][..]));
|
assert_eq!(docs.get(1), Some(Set::new(&[a, b, c])?));
|
||||||
assert_eq!(docs.get(2), Some(&[a, c][..]));
|
assert_eq!(docs.get(2), Some(Set::new(&[a, c])?));
|
||||||
assert_eq!(docs.get(3), None);
|
assert_eq!(docs.get(3), None);
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
@ -187,9 +189,9 @@ mod tests {
|
|||||||
|
|
||||||
let mut builder = DocIndexesBuilder::memory();
|
let mut builder = DocIndexesBuilder::memory();
|
||||||
|
|
||||||
builder.insert(&[a])?;
|
builder.insert(Set::new(&[a])?)?;
|
||||||
builder.insert(&[a, b, c])?;
|
builder.insert(Set::new(&[a, b, c])?)?;
|
||||||
builder.insert(&[a, c])?;
|
builder.insert(Set::new(&[a, c])?)?;
|
||||||
|
|
||||||
let builder_bytes = builder.into_inner()?;
|
let builder_bytes = builder.into_inner()?;
|
||||||
let docs = DocIndexes::from_bytes(builder_bytes.clone())?;
|
let docs = DocIndexes::from_bytes(builder_bytes.clone())?;
|
||||||
|
@ -2,6 +2,7 @@ use std::error::Error;
|
|||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
|
|
||||||
|
use sdset::Set;
|
||||||
use serde::de::{self, Deserialize, Deserializer};
|
use serde::de::{self, Deserialize, Deserializer};
|
||||||
use serde::ser::{Serialize, Serializer};
|
use serde::ser::{Serialize, Serializer};
|
||||||
use crate::data::DocIds;
|
use crate::data::DocIds;
|
||||||
@ -38,8 +39,8 @@ impl NegativeBlob {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl AsRef<[DocumentId]> for NegativeBlob {
|
impl AsRef<Set<DocumentId>> for NegativeBlob {
|
||||||
fn as_ref(&self) -> &[DocumentId] {
|
fn as_ref(&self) -> &Set<DocumentId> {
|
||||||
self.as_ids().doc_ids()
|
self.as_ids().doc_ids()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -47,7 +48,7 @@ impl AsRef<[DocumentId]> for NegativeBlob {
|
|||||||
impl fmt::Debug for NegativeBlob {
|
impl fmt::Debug for NegativeBlob {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
write!(f, "NegativeBlob(")?;
|
write!(f, "NegativeBlob(")?;
|
||||||
f.debug_list().entries(self.as_ref()).finish()?;
|
f.debug_list().entries(self.as_ref().as_slice()).finish()?;
|
||||||
write!(f, ")")
|
write!(f, ")")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
use std::error::Error;
|
use std::error::Error;
|
||||||
|
|
||||||
use fst::{IntoStreamer, Streamer};
|
use fst::{IntoStreamer, Streamer};
|
||||||
use group_by::GroupBy;
|
|
||||||
use sdset::duo::DifferenceByKey;
|
use sdset::duo::DifferenceByKey;
|
||||||
use sdset::{Set, SetOperation};
|
use sdset::{Set, SetOperation};
|
||||||
|
use group_by::GroupBy;
|
||||||
|
|
||||||
use crate::database::blob::{Blob, Sign, PositiveBlob, PositiveBlobBuilder, NegativeBlob};
|
use crate::database::blob::{Blob, Sign, PositiveBlob, PositiveBlobBuilder, NegativeBlob};
|
||||||
use crate::database::blob::{positive, negative};
|
use crate::database::blob::{positive, negative};
|
||||||
@ -89,18 +89,16 @@ impl OpBuilder {
|
|||||||
};
|
};
|
||||||
|
|
||||||
let mut builder = PositiveBlobBuilder::memory();
|
let mut builder = PositiveBlobBuilder::memory();
|
||||||
let doc_ids = Set::new_unchecked(negative.as_ref());
|
|
||||||
|
|
||||||
let op_builder = positive::OpBuilder::new().add(&base).add(&positive);
|
let op_builder = positive::OpBuilder::new().add(&base).add(&positive);
|
||||||
let mut stream = op_builder.union().into_stream();
|
let mut stream = op_builder.union().into_stream();
|
||||||
while let Some((input, doc_indexes)) = stream.next() {
|
while let Some((input, doc_indexes)) = stream.next() {
|
||||||
let doc_indexes = Set::new_unchecked(doc_indexes);
|
let op = DifferenceByKey::new(doc_indexes, negative.as_ref(), |x| x.document_id, |x| *x);
|
||||||
let op = DifferenceByKey::new(doc_indexes, doc_ids, |x| x.document_id, |x| *x);
|
|
||||||
|
|
||||||
buffer.clear();
|
buffer.clear();
|
||||||
op.extend_vec(&mut buffer);
|
op.extend_vec(&mut buffer);
|
||||||
if !buffer.is_empty() {
|
if !buffer.is_empty() {
|
||||||
builder.insert(input, &buffer)?;
|
builder.insert(input, Set::new_unchecked(&buffer))?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4,6 +4,7 @@ use std::path::Path;
|
|||||||
use std::error::Error;
|
use std::error::Error;
|
||||||
|
|
||||||
use fst::{map, Map, Streamer, IntoStreamer};
|
use fst::{map, Map, Streamer, IntoStreamer};
|
||||||
|
use sdset::Set;
|
||||||
|
|
||||||
use crate::DocIndex;
|
use crate::DocIndex;
|
||||||
use crate::data::{DocIndexes, DocIndexesBuilder};
|
use crate::data::{DocIndexes, DocIndexesBuilder};
|
||||||
@ -177,7 +178,7 @@ impl<W: Write, X: Write> PositiveBlobBuilder<W, X> {
|
|||||||
/// then an error is returned. Similarly, if there was a problem writing
|
/// then an error is returned. Similarly, if there was a problem writing
|
||||||
/// to the underlying writer, an error is returned.
|
/// to the underlying writer, an error is returned.
|
||||||
// FIXME what if one write doesn't work but the other do ?
|
// FIXME what if one write doesn't work but the other do ?
|
||||||
pub fn insert<K>(&mut self, key: K, doc_indexes: &[DocIndex]) -> Result<(), Box<Error>>
|
pub fn insert<K>(&mut self, key: K, doc_indexes: &Set<DocIndex>) -> Result<(), Box<Error>>
|
||||||
where K: AsRef<[u8]>,
|
where K: AsRef<[u8]>,
|
||||||
{
|
{
|
||||||
self.map.insert(key, self.value)?;
|
self.map.insert(key, self.value)?;
|
||||||
@ -210,9 +211,9 @@ mod tests {
|
|||||||
|
|
||||||
let mut builder = PositiveBlobBuilder::memory();
|
let mut builder = PositiveBlobBuilder::memory();
|
||||||
|
|
||||||
builder.insert("aaa", &[a])?;
|
builder.insert("aaa", Set::new(&[a])?)?;
|
||||||
builder.insert("aab", &[a, b, c])?;
|
builder.insert("aab", Set::new(&[a, b, c])?)?;
|
||||||
builder.insert("aac", &[a, c])?;
|
builder.insert("aac", Set::new(&[a, c])?)?;
|
||||||
|
|
||||||
let (map_bytes, indexes_bytes) = builder.into_inner()?;
|
let (map_bytes, indexes_bytes) = builder.into_inner()?;
|
||||||
let positive_blob = PositiveBlob::from_bytes(map_bytes, indexes_bytes)?;
|
let positive_blob = PositiveBlob::from_bytes(map_bytes, indexes_bytes)?;
|
||||||
@ -233,9 +234,9 @@ mod tests {
|
|||||||
|
|
||||||
let mut builder = PositiveBlobBuilder::memory();
|
let mut builder = PositiveBlobBuilder::memory();
|
||||||
|
|
||||||
builder.insert("aaa", &[a])?;
|
builder.insert("aaa", Set::new(&[a])?)?;
|
||||||
builder.insert("aab", &[a, b, c])?;
|
builder.insert("aab", Set::new(&[a, b, c])?)?;
|
||||||
builder.insert("aac", &[a, c])?;
|
builder.insert("aac", Set::new(&[a, c])?)?;
|
||||||
|
|
||||||
let (map_bytes, indexes_bytes) = builder.into_inner()?;
|
let (map_bytes, indexes_bytes) = builder.into_inner()?;
|
||||||
let positive_blob = PositiveBlob::from_bytes(map_bytes, indexes_bytes)?;
|
let positive_blob = PositiveBlob::from_bytes(map_bytes, indexes_bytes)?;
|
||||||
|
@ -74,7 +74,7 @@ impl<'m> $name<'m> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<'m, 'a> fst::Streamer<'a> for $name<'m> {
|
impl<'m, 'a> fst::Streamer<'a> for $name<'m> {
|
||||||
type Item = (&'a [u8], &'a [DocIndex]);
|
type Item = (&'a [u8], &'a Set<DocIndex>);
|
||||||
|
|
||||||
fn next(&'a mut self) -> Option<Self::Item> {
|
fn next(&'a mut self) -> Option<Self::Item> {
|
||||||
// loop {
|
// loop {
|
||||||
@ -114,7 +114,7 @@ impl<'m, 'a> fst::Streamer<'a> for $name<'m> {
|
|||||||
builder.$operation().extend_vec(&mut self.outs);
|
builder.$operation().extend_vec(&mut self.outs);
|
||||||
|
|
||||||
if self.outs.is_empty() { return None }
|
if self.outs.is_empty() { return None }
|
||||||
return Some((input, &self.outs))
|
return Some((input, Set::new_unchecked(&self.outs)))
|
||||||
},
|
},
|
||||||
None => None
|
None => None
|
||||||
}
|
}
|
||||||
|
@ -48,9 +48,9 @@ impl NegativeUpdateBuilder {
|
|||||||
Blob::Positive(_) => unreachable!(),
|
Blob::Positive(_) => unreachable!(),
|
||||||
};
|
};
|
||||||
|
|
||||||
for &document_id in negative_blob.as_ref() {
|
for &document_id in negative_blob.as_ref().as_slice() {
|
||||||
let start = DocumentKey::new(document_id);
|
let start = DocumentKey::new(document_id);
|
||||||
let end = DocumentKey::new(document_id + 1);
|
let end = start.with_attribute_max();
|
||||||
file_writer.delete_range(start.as_ref(), end.as_ref())?;
|
file_writer.delete_range(start.as_ref(), end.as_ref())?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4,6 +4,8 @@ use std::collections::BTreeMap;
|
|||||||
use std::error::Error;
|
use std::error::Error;
|
||||||
use std::io::Write;
|
use std::io::Write;
|
||||||
|
|
||||||
|
use sdset::Set;
|
||||||
|
|
||||||
use crate::database::blob::positive::PositiveBlobBuilder;
|
use crate::database::blob::positive::PositiveBlobBuilder;
|
||||||
use crate::DocIndex;
|
use crate::DocIndex;
|
||||||
|
|
||||||
@ -40,7 +42,7 @@ impl<W: Write, X: Write> UnorderedPositiveBlobBuilder<W, X> {
|
|||||||
pub fn into_inner(mut self) -> Result<(W, X), Box<Error>> {
|
pub fn into_inner(mut self) -> Result<(W, X), Box<Error>> {
|
||||||
for (key, mut doc_indexes) in self.map {
|
for (key, mut doc_indexes) in self.map {
|
||||||
doc_indexes.sort_unstable();
|
doc_indexes.sort_unstable();
|
||||||
self.builder.insert(&key, &doc_indexes)?;
|
self.builder.insert(&key, Set::new_unchecked(&doc_indexes))?;
|
||||||
}
|
}
|
||||||
self.builder.into_inner()
|
self.builder.into_inner()
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user