Merge pull request #51 from Kerollmops/wordarea-attribute-fallible

Make the Attribute and WordArea errors recoverable
This commit is contained in:
Clément Renault 2018-12-28 18:26:19 +01:00 committed by GitHub
commit 70772eca5c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 208 additions and 51 deletions

View File

@ -164,9 +164,21 @@ mod tests {
#[test] #[test]
fn builder_serialize_deserialize() -> Result<(), Box<Error>> { fn builder_serialize_deserialize() -> Result<(), Box<Error>> {
let a = DocIndex { document_id: DocumentId(0), attribute: Attribute::new(3, 11), word_area: WordArea::new(30, 4) }; let a = DocIndex {
let b = DocIndex { document_id: DocumentId(1), attribute: Attribute::new(4, 21), word_area: WordArea::new(35, 6) }; document_id: DocumentId(0),
let c = DocIndex { document_id: DocumentId(2), attribute: Attribute::new(8, 2), word_area: WordArea::new(89, 6) }; attribute: Attribute::new_faillible(3, 11),
word_area: WordArea::new_faillible(30, 4)
};
let b = DocIndex {
document_id: DocumentId(1),
attribute: Attribute::new_faillible(4, 21),
word_area: WordArea::new_faillible(35, 6)
};
let c = DocIndex {
document_id: DocumentId(2),
attribute: Attribute::new_faillible(8, 2),
word_area: WordArea::new_faillible(89, 6)
};
let mut builder = DocIndexesBuilder::memory(); let mut builder = DocIndexesBuilder::memory();
@ -187,9 +199,21 @@ mod tests {
#[test] #[test]
fn serialize_deserialize() -> Result<(), Box<Error>> { fn serialize_deserialize() -> Result<(), Box<Error>> {
let a = DocIndex { document_id: DocumentId(0), attribute: Attribute::new(3, 11), word_area: WordArea::new(30, 4) }; let a = DocIndex {
let b = DocIndex { document_id: DocumentId(1), attribute: Attribute::new(4, 21), word_area: WordArea::new(35, 6) }; document_id: DocumentId(0),
let c = DocIndex { document_id: DocumentId(2), attribute: Attribute::new(8, 2), word_area: WordArea::new(89, 6) }; attribute: Attribute::new_faillible(3, 11),
word_area: WordArea::new_faillible(30, 4)
};
let b = DocIndex {
document_id: DocumentId(1),
attribute: Attribute::new_faillible(4, 21),
word_area: WordArea::new_faillible(35, 6)
};
let c = DocIndex {
document_id: DocumentId(2),
attribute: Attribute::new_faillible(8, 2),
word_area: WordArea::new_faillible(89, 6)
};
let mut builder = DocIndexesBuilder::memory(); let mut builder = DocIndexesBuilder::memory();

View File

@ -209,9 +209,21 @@ mod tests {
#[test] #[test]
fn serialize_deserialize() -> Result<(), Box<Error>> { fn serialize_deserialize() -> Result<(), Box<Error>> {
let a = DocIndex { document_id: DocumentId(0), attribute: Attribute::new(3, 11), word_area: WordArea::new(30, 4) }; let a = DocIndex {
let b = DocIndex { document_id: DocumentId(1), attribute: Attribute::new(4, 21), word_area: WordArea::new(35, 6) }; document_id: DocumentId(0),
let c = DocIndex { document_id: DocumentId(2), attribute: Attribute::new(8, 2), word_area: WordArea::new(89, 6) }; attribute: Attribute::new_faillible(3, 11),
word_area: WordArea::new_faillible(30, 4)
};
let b = DocIndex {
document_id: DocumentId(1),
attribute: Attribute::new_faillible(4, 21),
word_area: WordArea::new_faillible(35, 6)
};
let c = DocIndex {
document_id: DocumentId(2),
attribute: Attribute::new_faillible(8, 2),
word_area: WordArea::new_faillible(89, 6)
};
let mut builder = PositiveBlobBuilder::memory(); let mut builder = PositiveBlobBuilder::memory();
@ -232,9 +244,21 @@ mod tests {
#[test] #[test]
fn serde_serialize_deserialize() -> Result<(), Box<Error>> { fn serde_serialize_deserialize() -> Result<(), Box<Error>> {
let a = DocIndex { document_id: DocumentId(0), attribute: Attribute::new(3, 11), word_area: WordArea::new(30, 4) }; let a = DocIndex {
let b = DocIndex { document_id: DocumentId(1), attribute: Attribute::new(4, 21), word_area: WordArea::new(35, 6) }; document_id: DocumentId(0),
let c = DocIndex { document_id: DocumentId(2), attribute: Attribute::new(8, 2), word_area: WordArea::new(89, 6) }; attribute: Attribute::new_faillible(3, 11),
word_area: WordArea::new_faillible(30, 4)
};
let b = DocIndex {
document_id: DocumentId(1),
attribute: Attribute::new_faillible(4, 21),
word_area: WordArea::new_faillible(35, 6)
};
let c = DocIndex {
document_id: DocumentId(2),
attribute: Attribute::new_faillible(8, 2),
word_area: WordArea::new_faillible(89, 6)
};
let mut builder = PositiveBlobBuilder::memory(); let mut builder = PositiveBlobBuilder::memory();

View File

@ -348,8 +348,8 @@ where B: TokenizerBuilder
for Token { word, word_index, char_index } in self.tokenizer_builder.build(v) { for Token { word, word_index, char_index } in self.tokenizer_builder.build(v) {
let doc_index = DocIndex { let doc_index = DocIndex {
document_id: self.document_id, document_id: self.document_id,
attribute: Attribute::new(self.attribute.0, word_index as u32), attribute: Attribute::new_faillible(self.attribute.0, word_index as u32),
word_area: WordArea::new(char_index as u32, word.len() as u16), word_area: WordArea::new_faillible(char_index as u32, word.len() as u16),
}; };
// insert the exact representation // insert the exact representation

View File

@ -29,6 +29,21 @@ pub struct DocumentId(u64);
pub struct Attribute(u32); pub struct Attribute(u32);
impl Attribute { impl Attribute {
/// Construct an `Attribute` from an attribute number and
/// the word position of a match according to the tokenizer used.
fn new(attribute: u16, index: u32) -> Result<Attribute, AttributeError> {
if attribute & 0b1111_1100_0000_0000 != 0 {
return Err(AttributeError::AttributeTooBig)
}
if index & 0b1111_1111_1100_0000_0000_0000_0000 != 0 {
return Err(AttributeError::IndexTooBig)
}
let attribute = (attribute as u32) << 22;
Ok(Attribute(attribute | index))
}
/// Construct an `Attribute` from an attribute number and /// Construct an `Attribute` from an attribute number and
/// the word position of a match according to the tokenizer used. /// the word position of a match according to the tokenizer used.
/// ///
@ -36,12 +51,16 @@ impl Attribute {
/// ///
/// The attribute must not be greater than 1024 /// The attribute must not be greater than 1024
/// and the word index not greater than 2^22. /// and the word index not greater than 2^22.
fn new(attribute: u16, index: u32) -> Attribute { fn new_faillible(attribute: u16, index: u32) -> Attribute {
assert!(attribute & 0b1111_1100_0000_0000 == 0); match Attribute::new(attribute, index) {
assert!(index & 0b1111_1111_1100_0000_0000_0000_0000 == 0); Ok(attribute) => attribute,
Err(AttributeError::AttributeTooBig) => {
let attribute = (attribute as u32) << 22; panic!("attribute must not be greater than 1024")
Attribute(attribute | index) },
Err(AttributeError::IndexTooBig) => {
panic!("attribute word index must not be greater than 2^22")
},
}
} }
pub fn attribute(&self) -> u16 { pub fn attribute(&self) -> u16 {
@ -62,6 +81,11 @@ impl fmt::Debug for Attribute {
} }
} }
enum AttributeError {
AttributeTooBig,
IndexTooBig,
}
/// Represent a word position in bytes along with the length of it. /// Represent a word position in bytes along with the length of it.
/// ///
/// It can represent words byte index to maximum 2^22 and /// It can represent words byte index to maximum 2^22 and
@ -77,12 +101,32 @@ impl WordArea {
/// ///
/// The byte index must not be greater than 2^22 /// The byte index must not be greater than 2^22
/// and the length not greater than 1024. /// and the length not greater than 1024.
fn new(byte_index: u32, length: u16) -> WordArea { fn new(byte_index: u32, length: u16) -> Result<WordArea, WordAreaError> {
assert!(byte_index & 0b1111_1111_1100_0000_0000_0000_0000 == 0); assert!(byte_index & 0b1111_1111_1100_0000_0000_0000_0000 == 0);
assert!(length & 0b1111_1100_0000_0000 == 0); assert!(length & 0b1111_1100_0000_0000 == 0);
if byte_index & 0b1111_1111_1100_0000_0000_0000_0000 != 0 {
return Err(WordAreaError::ByteIndexTooBig)
}
if length & 0b1111_1100_0000_0000 != 0 {
return Err(WordAreaError::LengthTooBig)
}
let byte_index = byte_index << 10; let byte_index = byte_index << 10;
WordArea(byte_index | (length as u32)) Ok(WordArea(byte_index | (length as u32)))
}
fn new_faillible(byte_index: u32, length: u16) -> WordArea {
match WordArea::new(byte_index, length) {
Ok(word_area) => word_area,
Err(WordAreaError::ByteIndexTooBig) => {
panic!("word area byte index must not be greater than 2^22")
},
Err(WordAreaError::LengthTooBig) => {
panic!("word area length must not be greater than 1024")
},
}
} }
pub fn byte_index(&self) -> u32 { pub fn byte_index(&self) -> u32 {
@ -103,6 +147,11 @@ impl fmt::Debug for WordArea {
} }
} }
enum WordAreaError {
ByteIndexTooBig,
LengthTooBig,
}
/// This structure represent the position of a word /// This structure represent the position of a word
/// in a document and its attributes. /// in a document and its attributes.
/// ///
@ -166,9 +215,9 @@ impl Match {
Match { Match {
query_index: 0, query_index: 0,
distance: 0, distance: 0,
attribute: Attribute::new(0, 0), attribute: Attribute::new_faillible(0, 0),
is_exact: false, is_exact: false,
word_area: WordArea::new(0, 0), word_area: WordArea::new_faillible(0, 0),
} }
} }
@ -200,7 +249,7 @@ mod tests {
return TestResult::discard() return TestResult::discard()
} }
let attribute = Attribute::new(gen_attr, gen_index); let attribute = Attribute::new_faillible(gen_attr, gen_index);
let valid_attribute = attribute.attribute() == gen_attr; let valid_attribute = attribute.attribute() == gen_attr;
let valid_index = attribute.word_index() == gen_index; let valid_index = attribute.word_index() == gen_index;
@ -213,8 +262,8 @@ mod tests {
return TestResult::discard() return TestResult::discard()
} }
let a = Attribute::new(gen_attr, gen_index); let a = Attribute::new_faillible(gen_attr, gen_index);
let b = Attribute::new(gen_attr + 1, gen_index + 1); let b = Attribute::new_faillible(gen_attr + 1, gen_index + 1);
TestResult::from_bool(a < b) TestResult::from_bool(a < b)
} }
@ -224,7 +273,7 @@ mod tests {
return TestResult::discard() return TestResult::discard()
} }
let word_area = WordArea::new(gen_byte_index, gen_length); let word_area = WordArea::new_faillible(gen_byte_index, gen_length);
let valid_char_index = word_area.byte_index() == gen_byte_index; let valid_char_index = word_area.byte_index() == gen_byte_index;
let valid_length = word_area.length() == gen_length; let valid_length = word_area.length() == gen_length;
@ -237,8 +286,8 @@ mod tests {
return TestResult::discard() return TestResult::discard()
} }
let a = WordArea::new(gen_byte_index, gen_length); let a = WordArea::new_faillible(gen_byte_index, gen_length);
let b = WordArea::new(gen_byte_index + 1, gen_length + 1); let b = WordArea::new_faillible(gen_byte_index + 1, gen_length + 1);
TestResult::from_bool(a < b) TestResult::from_bool(a < b)
} }

View File

@ -54,8 +54,20 @@ mod tests {
fn one_typo_reference() { fn one_typo_reference() {
let doc0 = { let doc0 = {
let matches = vec![ let matches = vec![
Match { query_index: 0, distance: 0, attribute: Attribute::new(0, 0), is_exact: false, word_area: WordArea::new(0, 6) }, Match {
Match { query_index: 1, distance: 0, attribute: Attribute::new(0, 2), is_exact: false, word_area: WordArea::new(0, 6) }, query_index: 0,
distance: 0,
attribute: Attribute::new_faillible(0, 0),
is_exact: false,
word_area: WordArea::new_faillible(0, 6)
},
Match {
query_index: 1,
distance: 0,
attribute: Attribute::new_faillible(0, 2),
is_exact: false,
word_area: WordArea::new_faillible(0, 6)
},
]; ];
Document { Document {
id: DocumentId(0), id: DocumentId(0),
@ -65,8 +77,20 @@ mod tests {
let doc1 = { let doc1 = {
let matches = vec![ let matches = vec![
Match { query_index: 0, distance: 1, attribute: Attribute::new(0, 0), is_exact: false, word_area: WordArea::new(0, 6) }, Match {
Match { query_index: 1, distance: 0, attribute: Attribute::new(0, 2), is_exact: false, word_area: WordArea::new(0, 6) }, query_index: 0,
distance: 1,
attribute: Attribute::new_faillible(0, 0),
is_exact: false,
word_area: WordArea::new_faillible(0, 6)
},
Match {
query_index: 1,
distance: 0,
attribute: Attribute::new_faillible(0, 2),
is_exact: false,
word_area: WordArea::new_faillible(0, 6)
},
]; ];
Document { Document {
id: DocumentId(1), id: DocumentId(1),
@ -87,8 +111,20 @@ mod tests {
fn no_typo() { fn no_typo() {
let doc0 = { let doc0 = {
let matches = vec![ let matches = vec![
Match { query_index: 0, distance: 0, attribute: Attribute::new(0, 0), is_exact: false, word_area: WordArea::new(0, 6) }, Match {
Match { query_index: 1, distance: 0, attribute: Attribute::new(0, 1), is_exact: false, word_area: WordArea::new(0, 6) }, query_index: 0,
distance: 0,
attribute: Attribute::new_faillible(0, 0),
is_exact: false,
word_area: WordArea::new_faillible(0, 6)
},
Match {
query_index: 1,
distance: 0,
attribute: Attribute::new_faillible(0, 1),
is_exact: false,
word_area: WordArea::new_faillible(0, 6)
},
]; ];
Document { Document {
id: DocumentId(0), id: DocumentId(0),
@ -98,7 +134,13 @@ mod tests {
let doc1 = { let doc1 = {
let matches = vec![ let matches = vec![
Match { query_index: 0, distance: 0, attribute: Attribute::new(0, 0), is_exact: false, word_area: WordArea::new(0, 6) }, Match {
query_index: 0,
distance: 0,
attribute: Attribute::new_faillible(0, 0),
is_exact: false,
word_area: WordArea::new_faillible(0, 6)
},
]; ];
Document { Document {
id: DocumentId(1), id: DocumentId(1),
@ -119,8 +161,20 @@ mod tests {
fn one_typo() { fn one_typo() {
let doc0 = { let doc0 = {
let matches = vec![ let matches = vec![
Match { query_index: 0, distance: 0, attribute: Attribute::new(0, 0), is_exact: false, word_area: WordArea::new(0, 6) }, Match {
Match { query_index: 1, distance: 1, attribute: Attribute::new(0, 1), is_exact: false, word_area: WordArea::new(0, 6) }, query_index: 0,
distance: 0,
attribute: Attribute::new_faillible(0, 0),
is_exact: false,
word_area: WordArea::new_faillible(0, 6)
},
Match {
query_index: 1,
distance: 1,
attribute: Attribute::new_faillible(0, 1),
is_exact: false,
word_area: WordArea::new_faillible(0, 6)
},
]; ];
Document { Document {
id: DocumentId(0), id: DocumentId(0),
@ -130,7 +184,13 @@ mod tests {
let doc1 = { let doc1 = {
let matches = vec![ let matches = vec![
Match { query_index: 0, distance: 0, attribute: Attribute::new(0, 0), is_exact: false, word_area: WordArea::new(0, 6) }, Match {
query_index: 0,
distance: 0,
attribute: Attribute::new_faillible(0, 0),
is_exact: false,
word_area: WordArea::new_faillible(0, 6)
},
]; ];
Document { Document {
id: DocumentId(1), id: DocumentId(1),

View File

@ -81,11 +81,11 @@ mod tests {
// { id: 3, attr: 3, attr_index: 1 } // { id: 3, attr: 3, attr_index: 1 }
let matches = &[ let matches = &[
Match { query_index: 0, attribute: Attribute::new(0, 0), ..Match::zero() }, Match { query_index: 0, attribute: Attribute::new_faillible(0, 0), ..Match::zero() },
Match { query_index: 1, attribute: Attribute::new(1, 0), ..Match::zero() }, Match { query_index: 1, attribute: Attribute::new_faillible(1, 0), ..Match::zero() },
Match { query_index: 2, attribute: Attribute::new(1, 1), ..Match::zero() }, Match { query_index: 2, attribute: Attribute::new_faillible(1, 1), ..Match::zero() },
Match { query_index: 2, attribute: Attribute::new(2, 0), ..Match::zero() }, Match { query_index: 2, attribute: Attribute::new_faillible(2, 0), ..Match::zero() },
Match { query_index: 3, attribute: Attribute::new(3, 1), ..Match::zero() }, Match { query_index: 3, attribute: Attribute::new_faillible(3, 1), ..Match::zero() },
]; ];
// soup -> of = 8 // soup -> of = 8
@ -107,12 +107,12 @@ mod tests {
// { id: 3, attr: 1, attr_index: 3 } // { id: 3, attr: 1, attr_index: 3 }
let matches = &[ let matches = &[
Match { query_index: 0, attribute: Attribute::new(0, 0), ..Match::zero() }, Match { query_index: 0, attribute: Attribute::new_faillible(0, 0), ..Match::zero() },
Match { query_index: 0, attribute: Attribute::new(1, 0), ..Match::zero() }, Match { query_index: 0, attribute: Attribute::new_faillible(1, 0), ..Match::zero() },
Match { query_index: 1, attribute: Attribute::new(1, 1), ..Match::zero() }, Match { query_index: 1, attribute: Attribute::new_faillible(1, 1), ..Match::zero() },
Match { query_index: 2, attribute: Attribute::new(1, 2), ..Match::zero() }, Match { query_index: 2, attribute: Attribute::new_faillible(1, 2), ..Match::zero() },
Match { query_index: 3, attribute: Attribute::new(0, 1), ..Match::zero() }, Match { query_index: 3, attribute: Attribute::new_faillible(0, 1), ..Match::zero() },
Match { query_index: 3, attribute: Attribute::new(1, 3), ..Match::zero() }, Match { query_index: 3, attribute: Attribute::new_faillible(1, 3), ..Match::zero() },
]; ];
// soup -> of = 1 // soup -> of = 1