use std::collections::BTreeSet; use std::slice::from_raw_parts; use std::error::Error; use std::path::Path; use std::sync::Arc; use std::{io, mem}; use byteorder::{NativeEndian, WriteBytesExt}; use fst::raw::MmapReadOnly; use serde::ser::{Serialize, Serializer}; use crate::DocumentId; use crate::data::Data; #[derive(Clone)] pub struct DocIds { data: Data, } impl DocIds { pub unsafe fn from_path>(path: P) -> io::Result { let mmap = MmapReadOnly::open_path(path)?; let data = Data::Mmap(mmap); Ok(DocIds { data }) } pub fn from_bytes(vec: Vec) -> Result> { // FIXME check if modulo DocumentId let len = vec.len(); let data = Data::Shared { vec: Arc::new(vec), offset: 0, len: len }; Ok(DocIds { data }) } pub fn contains(&self, doc: DocumentId) -> bool { // FIXME prefer using the sdset::exponential_search function self.doc_ids().binary_search(&doc).is_ok() } pub fn doc_ids(&self) -> &[DocumentId] { let slice = &self.data; let ptr = slice.as_ptr() as *const DocumentId; let len = slice.len() / mem::size_of::(); unsafe { from_raw_parts(ptr, len) } } } impl Serialize for DocIds { fn serialize(&self, serializer: S) -> Result { self.data.as_ref().serialize(serializer) } } pub struct DocIdsBuilder { doc_ids: BTreeSet, // TODO: prefer a linked-list wrt: W, } impl DocIdsBuilder { pub fn new(wrt: W) -> Self { Self { doc_ids: BTreeSet::new(), wrt: wrt, } } pub fn insert(&mut self, doc: DocumentId) -> bool { self.doc_ids.insert(doc) } pub fn into_inner(mut self) -> io::Result { for id in self.doc_ids { self.wrt.write_u64::(id)?; } Ok(self.wrt) } }