2018-11-08 12:05:59 +01:00
|
|
|
use std::slice::from_raw_parts;
|
|
|
|
use std::error::Error;
|
|
|
|
use std::path::Path;
|
|
|
|
use std::sync::Arc;
|
|
|
|
use std::{io, mem};
|
|
|
|
|
|
|
|
use fst::raw::MmapReadOnly;
|
2018-11-23 18:00:24 +01:00
|
|
|
use serde::ser::{Serialize, Serializer};
|
2018-11-08 12:05:59 +01:00
|
|
|
|
|
|
|
use crate::DocumentId;
|
|
|
|
use crate::data::Data;
|
|
|
|
|
2018-12-01 18:37:21 +01:00
|
|
|
#[derive(Default, Clone)]
|
2018-11-08 12:05:59 +01:00
|
|
|
pub struct DocIds {
|
2018-11-23 18:00:24 +01:00
|
|
|
data: Data,
|
2018-11-08 12:05:59 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
impl DocIds {
|
|
|
|
pub unsafe fn from_path<P: AsRef<Path>>(path: P) -> io::Result<Self> {
|
|
|
|
let mmap = MmapReadOnly::open_path(path)?;
|
2018-11-23 18:00:24 +01:00
|
|
|
let data = Data::Mmap(mmap);
|
|
|
|
Ok(DocIds { data })
|
2018-11-08 12:05:59 +01:00
|
|
|
}
|
|
|
|
|
2018-11-22 18:28:11 +01:00
|
|
|
pub fn from_bytes(vec: Vec<u8>) -> Result<Self, Box<Error>> {
|
|
|
|
// FIXME check if modulo DocumentId
|
2018-11-08 12:05:59 +01:00
|
|
|
let len = vec.len();
|
2018-11-23 18:00:24 +01:00
|
|
|
let data = Data::Shared {
|
2018-11-28 17:12:24 +01:00
|
|
|
bytes: Arc::new(vec),
|
2018-11-08 12:05:59 +01:00
|
|
|
offset: 0,
|
|
|
|
len: len
|
|
|
|
};
|
2018-11-23 18:00:24 +01:00
|
|
|
Ok(DocIds { data })
|
2018-11-08 12:05:59 +01:00
|
|
|
}
|
|
|
|
|
2018-11-26 17:30:19 +01:00
|
|
|
pub fn from_document_ids(vec: Vec<DocumentId>) -> Self {
|
|
|
|
DocIds::from_bytes(unsafe { mem::transmute(vec) }).unwrap()
|
|
|
|
}
|
|
|
|
|
2018-11-08 12:05:59 +01:00
|
|
|
pub fn contains(&self, doc: DocumentId) -> bool {
|
|
|
|
// FIXME prefer using the sdset::exponential_search function
|
|
|
|
self.doc_ids().binary_search(&doc).is_ok()
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn doc_ids(&self) -> &[DocumentId] {
|
2018-11-23 18:00:24 +01:00
|
|
|
let slice = &self.data;
|
2018-11-08 12:05:59 +01:00
|
|
|
let ptr = slice.as_ptr() as *const DocumentId;
|
|
|
|
let len = slice.len() / mem::size_of::<DocumentId>();
|
|
|
|
unsafe { from_raw_parts(ptr, len) }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-11-23 18:00:24 +01:00
|
|
|
impl Serialize for DocIds {
|
|
|
|
fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
|
|
|
|
self.data.as_ref().serialize(serializer)
|
|
|
|
}
|
|
|
|
}
|