diff --git a/meilidb-core/src/data/doc_indexes.rs b/meilidb-core/src/data/doc_indexes.rs deleted file mode 100644 index 2bb946745..000000000 --- a/meilidb-core/src/data/doc_indexes.rs +++ /dev/null @@ -1,231 +0,0 @@ -use std::io::{self, Write}; -use std::slice::from_raw_parts; -use std::mem::size_of; -use std::ops::Index; - -use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; -use sdset::Set; - -use crate::shared_data_cursor::{SharedDataCursor, FromSharedDataCursor}; -use crate::write_to_bytes::WriteToBytes; -use crate::data::SharedData; -use crate::DocIndex; - -use super::into_u8_slice; - -#[derive(Debug)] -#[repr(C)] -struct Range { - start: u64, - end: u64, -} - -#[derive(Clone, Default)] -pub struct DocIndexes { - ranges: SharedData, - indexes: SharedData, -} - -impl DocIndexes { - pub fn get(&self, index: usize) -> Option<&Set> { - self.ranges().get(index).map(|Range { start, end }| { - let start = *start as usize; - let end = *end as usize; - let slice = &self.indexes()[start..end]; - Set::new_unchecked(slice) - }) - } - - fn ranges(&self) -> &[Range] { - let slice = &self.ranges; - let ptr = slice.as_ptr() as *const Range; - let len = slice.len() / size_of::(); - unsafe { from_raw_parts(ptr, len) } - } - - fn indexes(&self) -> &[DocIndex] { - let slice = &self.indexes; - let ptr = slice.as_ptr() as *const DocIndex; - let len = slice.len() / size_of::(); - unsafe { from_raw_parts(ptr, len) } - } -} - -impl Index for DocIndexes { - type Output = [DocIndex]; - - fn index(&self, index: usize) -> &Self::Output { - match self.get(index) { - Some(indexes) => indexes, - None => panic!("index {} out of range for a maximum of {} ranges", index, self.ranges().len()), - } - } -} - -impl FromSharedDataCursor for DocIndexes { - type Error = io::Error; - - fn from_shared_data_cursor(cursor: &mut SharedDataCursor) -> Result { - let len = cursor.read_u64::()? as usize; - let ranges = cursor.extract(len); - - let len = cursor.read_u64::()? as usize; - let indexes = cursor.extract(len); - - Ok(DocIndexes { ranges, indexes }) - } -} - -impl WriteToBytes for DocIndexes { - fn write_to_bytes(&self, bytes: &mut Vec) { - let ranges_len = self.ranges.len() as u64; - let _ = bytes.write_u64::(ranges_len); - bytes.extend_from_slice(&self.ranges); - - let indexes_len = self.indexes.len() as u64; - let _ = bytes.write_u64::(indexes_len); - bytes.extend_from_slice(&self.indexes); - } -} - -pub struct DocIndexesBuilder { - ranges: Vec, - indexes: Vec, - wtr: W, -} - -impl DocIndexesBuilder> { - pub fn memory() -> Self { - DocIndexesBuilder { - ranges: Vec::new(), - indexes: Vec::new(), - wtr: Vec::new(), - } - } -} - -impl DocIndexesBuilder { - pub fn new(wtr: W) -> Self { - DocIndexesBuilder { - ranges: Vec::new(), - indexes: Vec::new(), - wtr: wtr, - } - } - - pub fn insert(&mut self, indexes: &Set) { - let len = indexes.len() as u64; - let start = self.ranges.last().map(|r| r.end).unwrap_or(0); - let range = Range { start, end: start + len }; - self.ranges.push(range); - - self.indexes.extend_from_slice(indexes); - } - - pub fn finish(self) -> io::Result<()> { - self.into_inner().map(drop) - } - - pub fn into_inner(mut self) -> io::Result { - let ranges = unsafe { into_u8_slice(&self.ranges) }; - let len = ranges.len() as u64; - self.wtr.write_u64::(len)?; - self.wtr.write_all(ranges)?; - - let indexes = unsafe { into_u8_slice(&self.indexes) }; - let len = indexes.len() as u64; - self.wtr.write_u64::(len)?; - self.wtr.write_all(indexes)?; - - Ok(self.wtr) - } -} - -#[cfg(test)] -mod tests { - use std::error::Error; - use crate::DocumentId; - use super::*; - - #[test] - fn builder_serialize_deserialize() -> Result<(), Box> { - let a = DocIndex { - document_id: DocumentId(0), - attribute: 3, - word_index: 11, - char_index: 30, - char_length: 4, - }; - let b = DocIndex { - document_id: DocumentId(1), - attribute: 4, - word_index: 21, - char_index: 35, - char_length: 6, - }; - let c = DocIndex { - document_id: DocumentId(2), - attribute: 8, - word_index: 2, - char_index: 89, - char_length: 6, - }; - - let mut builder = DocIndexesBuilder::memory(); - - builder.insert(Set::new(&[a])?); - builder.insert(Set::new(&[a, b, c])?); - builder.insert(Set::new(&[a, c])?); - - let bytes = builder.into_inner()?; - let docs = DocIndexes::from_bytes(bytes)?; - - assert_eq!(docs.get(0), Some(Set::new(&[a])?)); - assert_eq!(docs.get(1), Some(Set::new(&[a, b, c])?)); - assert_eq!(docs.get(2), Some(Set::new(&[a, c])?)); - assert_eq!(docs.get(3), None); - - Ok(()) - } - - #[test] - fn serialize_deserialize() -> Result<(), Box> { - let a = DocIndex { - document_id: DocumentId(0), - attribute: 3, - word_index: 11, - char_index: 30, - char_length: 4, - }; - let b = DocIndex { - document_id: DocumentId(1), - attribute: 4, - word_index: 21, - char_index: 35, - char_length: 6, - }; - let c = DocIndex { - document_id: DocumentId(2), - attribute: 8, - word_index: 2, - char_index: 89, - char_length: 6, - }; - - let mut builder = DocIndexesBuilder::memory(); - - builder.insert(Set::new(&[a])?); - builder.insert(Set::new(&[a, b, c])?); - builder.insert(Set::new(&[a, c])?); - - let builder_bytes = builder.into_inner()?; - let docs = DocIndexes::from_bytes(builder_bytes.clone())?; - - let mut bytes = Vec::new(); - docs.write_to_bytes(&mut bytes); - - assert_eq!(builder_bytes, bytes); - - Ok(()) - } -} diff --git a/meilidb-core/src/data/mod.rs b/meilidb-core/src/data/mod.rs index 195a71cdc..cb708088d 100644 --- a/meilidb-core/src/data/mod.rs +++ b/meilidb-core/src/data/mod.rs @@ -1,14 +1,3 @@ -mod doc_indexes; mod shared_data; -use std::slice::from_raw_parts; -use std::mem::size_of; - -pub use self::doc_indexes::{DocIndexes, DocIndexesBuilder}; pub use self::shared_data::SharedData; - -unsafe fn into_u8_slice(slice: &[T]) -> &[u8] { - let ptr = slice.as_ptr() as *const u8; - let len = slice.len() * size_of::(); - from_raw_parts(ptr, len) -}