mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-08 20:44:30 +01:00
feat: Use sdset Set primitives everywhere possible
This commit is contained in:
parent
06ba82cfa8
commit
6cb1bfd815
@ -4,6 +4,7 @@ use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
use std::{io, mem};
|
||||
|
||||
use sdset::Set;
|
||||
use fst::raw::MmapReadOnly;
|
||||
use serde::ser::{Serialize, Serializer};
|
||||
|
||||
@ -42,11 +43,12 @@ impl DocIds {
|
||||
self.doc_ids().binary_search(&doc).is_ok()
|
||||
}
|
||||
|
||||
pub fn doc_ids(&self) -> &[DocumentId] {
|
||||
pub fn doc_ids(&self) -> &Set<DocumentId> {
|
||||
let slice = &self.data;
|
||||
let ptr = slice.as_ptr() as *const DocumentId;
|
||||
let len = slice.len() / mem::size_of::<DocumentId>();
|
||||
unsafe { from_raw_parts(ptr, len) }
|
||||
let slice = unsafe { from_raw_parts(ptr, len) };
|
||||
Set::new_unchecked(slice)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,8 +5,9 @@ use std::ops::Index;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use fst::raw::MmapReadOnly;
|
||||
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
|
||||
use fst::raw::MmapReadOnly;
|
||||
use sdset::Set;
|
||||
|
||||
use crate::DocIndex;
|
||||
use crate::data::Data;
|
||||
@ -64,11 +65,12 @@ impl DocIndexes {
|
||||
bytes
|
||||
}
|
||||
|
||||
pub fn get(&self, index: usize) -> Option<&[DocIndex]> {
|
||||
pub fn get(&self, index: usize) -> Option<&Set<DocIndex>> {
|
||||
self.ranges().get(index as usize).map(|Range { start, end }| {
|
||||
let start = *start as usize;
|
||||
let end = *end as usize;
|
||||
&self.indexes()[start..end]
|
||||
let slice = &self.indexes()[start..end];
|
||||
Set::new_unchecked(slice)
|
||||
})
|
||||
}
|
||||
|
||||
@ -117,7 +119,7 @@ impl<W: Write> DocIndexesBuilder<W> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn insert(&mut self, indexes: &[DocIndex]) -> io::Result<()> {
|
||||
pub fn insert(&mut self, indexes: &Set<DocIndex>) -> io::Result<()> {
|
||||
let len = indexes.len() as u64;
|
||||
let start = self.ranges.last().map(|r| r.end).unwrap_or(0);
|
||||
let range = Range { start, end: start + len };
|
||||
@ -164,16 +166,16 @@ mod tests {
|
||||
|
||||
let mut builder = DocIndexesBuilder::memory();
|
||||
|
||||
builder.insert(&[a])?;
|
||||
builder.insert(&[a, b, c])?;
|
||||
builder.insert(&[a, c])?;
|
||||
builder.insert(Set::new(&[a])?)?;
|
||||
builder.insert(Set::new(&[a, b, c])?)?;
|
||||
builder.insert(Set::new(&[a, c])?)?;
|
||||
|
||||
let bytes = builder.into_inner()?;
|
||||
let docs = DocIndexes::from_bytes(bytes)?;
|
||||
|
||||
assert_eq!(docs.get(0), Some(&[a][..]));
|
||||
assert_eq!(docs.get(1), Some(&[a, b, c][..]));
|
||||
assert_eq!(docs.get(2), Some(&[a, c][..]));
|
||||
assert_eq!(docs.get(0), Some(Set::new(&[a])?));
|
||||
assert_eq!(docs.get(1), Some(Set::new(&[a, b, c])?));
|
||||
assert_eq!(docs.get(2), Some(Set::new(&[a, c])?));
|
||||
assert_eq!(docs.get(3), None);
|
||||
|
||||
Ok(())
|
||||
@ -187,9 +189,9 @@ mod tests {
|
||||
|
||||
let mut builder = DocIndexesBuilder::memory();
|
||||
|
||||
builder.insert(&[a])?;
|
||||
builder.insert(&[a, b, c])?;
|
||||
builder.insert(&[a, c])?;
|
||||
builder.insert(Set::new(&[a])?)?;
|
||||
builder.insert(Set::new(&[a, b, c])?)?;
|
||||
builder.insert(Set::new(&[a, c])?)?;
|
||||
|
||||
let builder_bytes = builder.into_inner()?;
|
||||
let docs = DocIndexes::from_bytes(builder_bytes.clone())?;
|
||||
|
@ -2,6 +2,7 @@ use std::error::Error;
|
||||
use std::path::Path;
|
||||
use std::fmt;
|
||||
|
||||
use sdset::Set;
|
||||
use serde::de::{self, Deserialize, Deserializer};
|
||||
use serde::ser::{Serialize, Serializer};
|
||||
use crate::data::DocIds;
|
||||
@ -38,8 +39,8 @@ impl NegativeBlob {
|
||||
}
|
||||
}
|
||||
|
||||
impl AsRef<[DocumentId]> for NegativeBlob {
|
||||
fn as_ref(&self) -> &[DocumentId] {
|
||||
impl AsRef<Set<DocumentId>> for NegativeBlob {
|
||||
fn as_ref(&self) -> &Set<DocumentId> {
|
||||
self.as_ids().doc_ids()
|
||||
}
|
||||
}
|
||||
@ -47,7 +48,7 @@ impl AsRef<[DocumentId]> for NegativeBlob {
|
||||
impl fmt::Debug for NegativeBlob {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "NegativeBlob(")?;
|
||||
f.debug_list().entries(self.as_ref()).finish()?;
|
||||
f.debug_list().entries(self.as_ref().as_slice()).finish()?;
|
||||
write!(f, ")")
|
||||
}
|
||||
}
|
||||
|
@ -1,9 +1,9 @@
|
||||
use std::error::Error;
|
||||
|
||||
use fst::{IntoStreamer, Streamer};
|
||||
use group_by::GroupBy;
|
||||
use sdset::duo::DifferenceByKey;
|
||||
use sdset::{Set, SetOperation};
|
||||
use group_by::GroupBy;
|
||||
|
||||
use crate::database::blob::{Blob, Sign, PositiveBlob, PositiveBlobBuilder, NegativeBlob};
|
||||
use crate::database::blob::{positive, negative};
|
||||
@ -89,18 +89,16 @@ impl OpBuilder {
|
||||
};
|
||||
|
||||
let mut builder = PositiveBlobBuilder::memory();
|
||||
let doc_ids = Set::new_unchecked(negative.as_ref());
|
||||
|
||||
let op_builder = positive::OpBuilder::new().add(&base).add(&positive);
|
||||
let mut stream = op_builder.union().into_stream();
|
||||
while let Some((input, doc_indexes)) = stream.next() {
|
||||
let doc_indexes = Set::new_unchecked(doc_indexes);
|
||||
let op = DifferenceByKey::new(doc_indexes, doc_ids, |x| x.document_id, |x| *x);
|
||||
let op = DifferenceByKey::new(doc_indexes, negative.as_ref(), |x| x.document_id, |x| *x);
|
||||
|
||||
buffer.clear();
|
||||
op.extend_vec(&mut buffer);
|
||||
if !buffer.is_empty() {
|
||||
builder.insert(input, &buffer)?;
|
||||
builder.insert(input, Set::new_unchecked(&buffer))?;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4,6 +4,7 @@ use std::path::Path;
|
||||
use std::error::Error;
|
||||
|
||||
use fst::{map, Map, Streamer, IntoStreamer};
|
||||
use sdset::Set;
|
||||
|
||||
use crate::DocIndex;
|
||||
use crate::data::{DocIndexes, DocIndexesBuilder};
|
||||
@ -177,7 +178,7 @@ impl<W: Write, X: Write> PositiveBlobBuilder<W, X> {
|
||||
/// then an error is returned. Similarly, if there was a problem writing
|
||||
/// to the underlying writer, an error is returned.
|
||||
// FIXME what if one write doesn't work but the other do ?
|
||||
pub fn insert<K>(&mut self, key: K, doc_indexes: &[DocIndex]) -> Result<(), Box<Error>>
|
||||
pub fn insert<K>(&mut self, key: K, doc_indexes: &Set<DocIndex>) -> Result<(), Box<Error>>
|
||||
where K: AsRef<[u8]>,
|
||||
{
|
||||
self.map.insert(key, self.value)?;
|
||||
@ -210,9 +211,9 @@ mod tests {
|
||||
|
||||
let mut builder = PositiveBlobBuilder::memory();
|
||||
|
||||
builder.insert("aaa", &[a])?;
|
||||
builder.insert("aab", &[a, b, c])?;
|
||||
builder.insert("aac", &[a, c])?;
|
||||
builder.insert("aaa", Set::new(&[a])?)?;
|
||||
builder.insert("aab", Set::new(&[a, b, c])?)?;
|
||||
builder.insert("aac", Set::new(&[a, c])?)?;
|
||||
|
||||
let (map_bytes, indexes_bytes) = builder.into_inner()?;
|
||||
let positive_blob = PositiveBlob::from_bytes(map_bytes, indexes_bytes)?;
|
||||
@ -233,9 +234,9 @@ mod tests {
|
||||
|
||||
let mut builder = PositiveBlobBuilder::memory();
|
||||
|
||||
builder.insert("aaa", &[a])?;
|
||||
builder.insert("aab", &[a, b, c])?;
|
||||
builder.insert("aac", &[a, c])?;
|
||||
builder.insert("aaa", Set::new(&[a])?)?;
|
||||
builder.insert("aab", Set::new(&[a, b, c])?)?;
|
||||
builder.insert("aac", Set::new(&[a, c])?)?;
|
||||
|
||||
let (map_bytes, indexes_bytes) = builder.into_inner()?;
|
||||
let positive_blob = PositiveBlob::from_bytes(map_bytes, indexes_bytes)?;
|
||||
|
@ -74,7 +74,7 @@ impl<'m> $name<'m> {
|
||||
}
|
||||
|
||||
impl<'m, 'a> fst::Streamer<'a> for $name<'m> {
|
||||
type Item = (&'a [u8], &'a [DocIndex]);
|
||||
type Item = (&'a [u8], &'a Set<DocIndex>);
|
||||
|
||||
fn next(&'a mut self) -> Option<Self::Item> {
|
||||
// loop {
|
||||
@ -114,7 +114,7 @@ impl<'m, 'a> fst::Streamer<'a> for $name<'m> {
|
||||
builder.$operation().extend_vec(&mut self.outs);
|
||||
|
||||
if self.outs.is_empty() { return None }
|
||||
return Some((input, &self.outs))
|
||||
return Some((input, Set::new_unchecked(&self.outs)))
|
||||
},
|
||||
None => None
|
||||
}
|
||||
|
@ -48,9 +48,9 @@ impl NegativeUpdateBuilder {
|
||||
Blob::Positive(_) => unreachable!(),
|
||||
};
|
||||
|
||||
for &document_id in negative_blob.as_ref() {
|
||||
for &document_id in negative_blob.as_ref().as_slice() {
|
||||
let start = DocumentKey::new(document_id);
|
||||
let end = DocumentKey::new(document_id + 1);
|
||||
let end = start.with_attribute_max();
|
||||
file_writer.delete_range(start.as_ref(), end.as_ref())?;
|
||||
}
|
||||
|
||||
|
@ -4,6 +4,8 @@ use std::collections::BTreeMap;
|
||||
use std::error::Error;
|
||||
use std::io::Write;
|
||||
|
||||
use sdset::Set;
|
||||
|
||||
use crate::database::blob::positive::PositiveBlobBuilder;
|
||||
use crate::DocIndex;
|
||||
|
||||
@ -40,7 +42,7 @@ impl<W: Write, X: Write> UnorderedPositiveBlobBuilder<W, X> {
|
||||
pub fn into_inner(mut self) -> Result<(W, X), Box<Error>> {
|
||||
for (key, mut doc_indexes) in self.map {
|
||||
doc_indexes.sort_unstable();
|
||||
self.builder.insert(&key, &doc_indexes)?;
|
||||
self.builder.insert(&key, Set::new_unchecked(&doc_indexes))?;
|
||||
}
|
||||
self.builder.into_inner()
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user