diff --git a/src/attribute.rs b/src/attribute.rs deleted file mode 100644 index 4c075e475..000000000 --- a/src/attribute.rs +++ /dev/null @@ -1,105 +0,0 @@ -use std::fmt; - -/// Represent an attribute number along with the word index -/// according to the tokenizer used. -/// -/// It can accept up to 1024 attributes and word positions -/// can be maximum 2^22. -#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct Attribute(u32); - -impl Attribute { - /// Construct an `Attribute` from an attribute number and - /// the word position of a match according to the tokenizer used. - pub(crate) fn new(attribute: u16, index: u32) -> Result { - if attribute & 0b1111_1100_0000_0000 != 0 { - return Err(AttributeError::AttributeTooBig) - } - - if index & 0b1111_1111_1100_0000_0000_0000_0000 != 0 { - return Err(AttributeError::IndexTooBig) - } - - let attribute = u32::from(attribute) << 22; - Ok(Attribute(attribute | index)) - } - - /// Construct an `Attribute` from an attribute number and - /// the word position of a match according to the tokenizer used. - /// - /// # Panics - /// - /// The attribute must not be greater than 1024 - /// and the word index not greater than 2^22. - pub(crate) fn new_faillible(attribute: u16, index: u32) -> Attribute { - match Attribute::new(attribute, index) { - Ok(attribute) => attribute, - Err(AttributeError::AttributeTooBig) => { - panic!("attribute must not be greater than 1024") - }, - Err(AttributeError::IndexTooBig) => { - panic!("attribute word index must not be greater than 2^22") - }, - } - } - - pub(crate) fn max_value() -> Attribute { - Attribute(u32::max_value()) - } - - #[inline] - pub fn attribute(self) -> u16 { - (self.0 >> 22) as u16 - } - - #[inline] - pub fn word_index(self) -> u32 { - self.0 & 0b0000_0000_0011_1111_1111_1111_1111 - } -} - -impl fmt::Debug for Attribute { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - f.debug_struct("Attribute") - .field("attribute", &self.attribute()) - .field("word_index", &self.word_index()) - .finish() - } -} - -pub enum AttributeError { - AttributeTooBig, - IndexTooBig, -} - -#[cfg(test)] -mod tests { - use super::*; - use quickcheck::{quickcheck, TestResult}; - - quickcheck! { - fn qc_attribute(gen_attr: u16, gen_index: u32) -> TestResult { - if gen_attr > 2_u16.pow(10) || gen_index > 2_u32.pow(22) { - return TestResult::discard() - } - - let attribute = Attribute::new_faillible(gen_attr, gen_index); - - let valid_attribute = attribute.attribute() == gen_attr; - let valid_index = attribute.word_index() == gen_index; - - TestResult::from_bool(valid_attribute && valid_index) - } - - fn qc_attribute_ord(gen_attr: u16, gen_index: u32) -> TestResult { - if gen_attr >= 2_u16.pow(10) || gen_index >= 2_u32.pow(22) { - return TestResult::discard() - } - - let a = Attribute::new_faillible(gen_attr, gen_index); - let b = Attribute::new_faillible(gen_attr + 1, gen_index + 1); - - TestResult::from_bool(a < b) - } - } -} diff --git a/src/data/doc_indexes.rs b/src/data/doc_indexes.rs index 4919b9fa0..67106a948 100644 --- a/src/data/doc_indexes.rs +++ b/src/data/doc_indexes.rs @@ -147,12 +147,9 @@ impl DocIndexesBuilder { #[cfg(test)] mod tests { - use super::*; - use std::error::Error; - use crate::{Attribute, WordArea}; - use crate::DocumentId; + use super::*; #[test] fn builder_serialize_deserialize() -> Result<(), Box> { diff --git a/src/database/serde/indexer_serializer.rs b/src/database/serde/indexer_serializer.rs index 6271e1b7b..63bb016d8 100644 --- a/src/database/serde/indexer_serializer.rs +++ b/src/database/serde/indexer_serializer.rs @@ -3,7 +3,7 @@ use crate::database::serde::SerializerError; use crate::database::schema::SchemaAttr; use crate::tokenizer::TokenizerBuilder; use crate::tokenizer::Token; -use crate::{DocumentId, DocIndex, Attribute, WordArea}; +use crate::{DocumentId, DocIndex}; use hashbrown::HashSet; use serde::Serialize; diff --git a/src/lib.rs b/src/lib.rs index 5f824b39a..bfa0b3cd9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,16 +5,12 @@ pub mod database; pub mod data; pub mod rank; pub mod tokenizer; -mod attribute; -mod word_area; mod common_words; pub use rocksdb; pub use self::tokenizer::Tokenizer; pub use self::common_words::CommonWords; -pub use self::attribute::{Attribute, AttributeError}; -pub use self::word_area::{WordArea, WordAreaError}; /// Represent an internally generated document unique identifier. /// diff --git a/src/rank/criterion/document_id.rs b/src/rank/criterion/document_id.rs index a388cf2de..8e4cf91b5 100644 --- a/src/rank/criterion/document_id.rs +++ b/src/rank/criterion/document_id.rs @@ -1,5 +1,4 @@ use std::cmp::Ordering; -use std::ops::Deref; use crate::rank::criterion::Criterion; use crate::rank::RawDocument; diff --git a/src/rank/criterion/mod.rs b/src/rank/criterion/mod.rs index 46b41ea0f..6272cf89d 100644 --- a/src/rank/criterion/mod.rs +++ b/src/rank/criterion/mod.rs @@ -8,8 +8,6 @@ mod exact; mod document_id; use std::cmp::Ordering; - -use crate::database::DatabaseView; use crate::rank::RawDocument; pub use self::{ diff --git a/src/rank/criterion/words_proximity.rs b/src/rank/criterion/words_proximity.rs index 6f101d4d0..b5d98e147 100644 --- a/src/rank/criterion/words_proximity.rs +++ b/src/rank/criterion/words_proximity.rs @@ -94,8 +94,6 @@ impl Criterion for WordsProximity { mod tests { use super::*; - use crate::Attribute; - #[test] fn three_different_attributes() { diff --git a/src/rank/query_builder.rs b/src/rank/query_builder.rs index ff160da7f..e6c49be6d 100644 --- a/src/rank/query_builder.rs +++ b/src/rank/query_builder.rs @@ -95,7 +95,6 @@ where D: Deref, op_builder.union() }; - let mut number_matches = 0; let mut matches = Vec::new(); while let Some((input, indexed_values)) = stream.next() { @@ -148,7 +147,6 @@ where D: Deref, info!("query_all took {}", elapsed); let mut groups = vec![documents.as_mut_slice()]; - let view = &self.view; 'criteria: for (ci, criterion) in self.criteria.as_ref().iter().enumerate() { let tmp_groups = mem::replace(&mut groups, Vec::new()); diff --git a/src/word_area.rs b/src/word_area.rs deleted file mode 100644 index 593b462a6..000000000 --- a/src/word_area.rs +++ /dev/null @@ -1,102 +0,0 @@ -use std::fmt; - -/// Represent a word position in bytes along with the length of it. -/// -/// It can represent words byte index to maximum 2^22 and -/// up to words of length 1024. -#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct WordArea(u32); - -impl WordArea { - /// Construct a `WordArea` from a word position in expresed as - /// a number of characters and the length of it. - /// - /// # Panics - /// - /// The char index must not be greater than 2^22 - /// and the length not greater than 1024. - pub(crate) fn new(char_index: u32, length: u16) -> Result { - if char_index & 0b1111_1111_1100_0000_0000_0000_0000 != 0 { - return Err(WordAreaError::ByteIndexTooBig) - } - - if length & 0b1111_1100_0000_0000 != 0 { - return Err(WordAreaError::LengthTooBig) - } - - let char_index = char_index << 10; - Ok(WordArea(char_index | u32::from(length))) - } - - pub(crate) fn new_faillible(char_index: u32, length: u16) -> WordArea { - match WordArea::new(char_index, length) { - Ok(word_area) => word_area, - Err(WordAreaError::ByteIndexTooBig) => { - panic!("word area byte index must not be greater than 2^22") - }, - Err(WordAreaError::LengthTooBig) => { - panic!("word area length must not be greater than 1024") - }, - } - } - - pub(crate) fn max_value() -> WordArea { - WordArea(u32::max_value()) - } - - #[inline] - pub fn char_index(self) -> u32 { - self.0 >> 10 - } - - #[inline] - pub fn length(self) -> u16 { - (self.0 & 0b0000_0000_0000_0000_0011_1111_1111) as u16 - } -} - -impl fmt::Debug for WordArea { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - f.debug_struct("WordArea") - .field("char_index", &self.char_index()) - .field("length", &self.length()) - .finish() - } -} - -pub enum WordAreaError { - ByteIndexTooBig, - LengthTooBig, -} - -#[cfg(test)] -mod tests { - use super::*; - use quickcheck::{quickcheck, TestResult}; - - quickcheck! { - fn qc_word_area(gen_char_index: u32, gen_length: u16) -> TestResult { - if gen_char_index > 2_u32.pow(22) || gen_length > 2_u16.pow(10) { - return TestResult::discard() - } - - let word_area = WordArea::new_faillible(gen_char_index, gen_length); - - let valid_char_index = word_area.char_index() == gen_char_index; - let valid_length = word_area.length() == gen_length; - - TestResult::from_bool(valid_char_index && valid_length) - } - - fn qc_word_area_ord(gen_char_index: u32, gen_length: u16) -> TestResult { - if gen_char_index >= 2_u32.pow(22) || gen_length >= 2_u16.pow(10) { - return TestResult::discard() - } - - let a = WordArea::new_faillible(gen_char_index, gen_length); - let b = WordArea::new_faillible(gen_char_index + 1, gen_length + 1); - - TestResult::from_bool(a < b) - } - } -}