mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-27 13:47:29 +01:00
feat: Make the Attribute and WordArea errors recoverable
This commit is contained in:
parent
e3bfb866e5
commit
b27f632e14
@ -164,9 +164,21 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn builder_serialize_deserialize() -> Result<(), Box<Error>> {
|
||||
let a = DocIndex { document_id: DocumentId(0), attribute: Attribute::new(3, 11), word_area: WordArea::new(30, 4) };
|
||||
let b = DocIndex { document_id: DocumentId(1), attribute: Attribute::new(4, 21), word_area: WordArea::new(35, 6) };
|
||||
let c = DocIndex { document_id: DocumentId(2), attribute: Attribute::new(8, 2), word_area: WordArea::new(89, 6) };
|
||||
let a = DocIndex {
|
||||
document_id: DocumentId(0),
|
||||
attribute: Attribute::new_faillible(3, 11),
|
||||
word_area: WordArea::new_faillible(30, 4)
|
||||
};
|
||||
let b = DocIndex {
|
||||
document_id: DocumentId(1),
|
||||
attribute: Attribute::new_faillible(4, 21),
|
||||
word_area: WordArea::new_faillible(35, 6)
|
||||
};
|
||||
let c = DocIndex {
|
||||
document_id: DocumentId(2),
|
||||
attribute: Attribute::new_faillible(8, 2),
|
||||
word_area: WordArea::new_faillible(89, 6)
|
||||
};
|
||||
|
||||
let mut builder = DocIndexesBuilder::memory();
|
||||
|
||||
@ -187,9 +199,21 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn serialize_deserialize() -> Result<(), Box<Error>> {
|
||||
let a = DocIndex { document_id: DocumentId(0), attribute: Attribute::new(3, 11), word_area: WordArea::new(30, 4) };
|
||||
let b = DocIndex { document_id: DocumentId(1), attribute: Attribute::new(4, 21), word_area: WordArea::new(35, 6) };
|
||||
let c = DocIndex { document_id: DocumentId(2), attribute: Attribute::new(8, 2), word_area: WordArea::new(89, 6) };
|
||||
let a = DocIndex {
|
||||
document_id: DocumentId(0),
|
||||
attribute: Attribute::new_faillible(3, 11),
|
||||
word_area: WordArea::new_faillible(30, 4)
|
||||
};
|
||||
let b = DocIndex {
|
||||
document_id: DocumentId(1),
|
||||
attribute: Attribute::new_faillible(4, 21),
|
||||
word_area: WordArea::new_faillible(35, 6)
|
||||
};
|
||||
let c = DocIndex {
|
||||
document_id: DocumentId(2),
|
||||
attribute: Attribute::new_faillible(8, 2),
|
||||
word_area: WordArea::new_faillible(89, 6)
|
||||
};
|
||||
|
||||
let mut builder = DocIndexesBuilder::memory();
|
||||
|
||||
|
@ -209,9 +209,21 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn serialize_deserialize() -> Result<(), Box<Error>> {
|
||||
let a = DocIndex { document_id: DocumentId(0), attribute: Attribute::new(3, 11), word_area: WordArea::new(30, 4) };
|
||||
let b = DocIndex { document_id: DocumentId(1), attribute: Attribute::new(4, 21), word_area: WordArea::new(35, 6) };
|
||||
let c = DocIndex { document_id: DocumentId(2), attribute: Attribute::new(8, 2), word_area: WordArea::new(89, 6) };
|
||||
let a = DocIndex {
|
||||
document_id: DocumentId(0),
|
||||
attribute: Attribute::new_faillible(3, 11),
|
||||
word_area: WordArea::new_faillible(30, 4)
|
||||
};
|
||||
let b = DocIndex {
|
||||
document_id: DocumentId(1),
|
||||
attribute: Attribute::new_faillible(4, 21),
|
||||
word_area: WordArea::new_faillible(35, 6)
|
||||
};
|
||||
let c = DocIndex {
|
||||
document_id: DocumentId(2),
|
||||
attribute: Attribute::new_faillible(8, 2),
|
||||
word_area: WordArea::new_faillible(89, 6)
|
||||
};
|
||||
|
||||
let mut builder = PositiveBlobBuilder::memory();
|
||||
|
||||
@ -232,9 +244,21 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn serde_serialize_deserialize() -> Result<(), Box<Error>> {
|
||||
let a = DocIndex { document_id: DocumentId(0), attribute: Attribute::new(3, 11), word_area: WordArea::new(30, 4) };
|
||||
let b = DocIndex { document_id: DocumentId(1), attribute: Attribute::new(4, 21), word_area: WordArea::new(35, 6) };
|
||||
let c = DocIndex { document_id: DocumentId(2), attribute: Attribute::new(8, 2), word_area: WordArea::new(89, 6) };
|
||||
let a = DocIndex {
|
||||
document_id: DocumentId(0),
|
||||
attribute: Attribute::new_faillible(3, 11),
|
||||
word_area: WordArea::new_faillible(30, 4)
|
||||
};
|
||||
let b = DocIndex {
|
||||
document_id: DocumentId(1),
|
||||
attribute: Attribute::new_faillible(4, 21),
|
||||
word_area: WordArea::new_faillible(35, 6)
|
||||
};
|
||||
let c = DocIndex {
|
||||
document_id: DocumentId(2),
|
||||
attribute: Attribute::new_faillible(8, 2),
|
||||
word_area: WordArea::new_faillible(89, 6)
|
||||
};
|
||||
|
||||
let mut builder = PositiveBlobBuilder::memory();
|
||||
|
||||
|
@ -348,8 +348,8 @@ where B: TokenizerBuilder
|
||||
for Token { word, word_index, char_index } in self.tokenizer_builder.build(v) {
|
||||
let doc_index = DocIndex {
|
||||
document_id: self.document_id,
|
||||
attribute: Attribute::new(self.attribute.0, word_index as u32),
|
||||
word_area: WordArea::new(char_index as u32, word.len() as u16),
|
||||
attribute: Attribute::new_faillible(self.attribute.0, word_index as u32),
|
||||
word_area: WordArea::new_faillible(char_index as u32, word.len() as u16),
|
||||
};
|
||||
|
||||
// insert the exact representation
|
||||
|
81
src/lib.rs
81
src/lib.rs
@ -29,6 +29,21 @@ pub struct DocumentId(u64);
|
||||
pub struct Attribute(u32);
|
||||
|
||||
impl Attribute {
|
||||
/// Construct an `Attribute` from an attribute number and
|
||||
/// the word position of a match according to the tokenizer used.
|
||||
fn new(attribute: u16, index: u32) -> Result<Attribute, AttributeError> {
|
||||
if attribute & 0b1111_1100_0000_0000 != 0 {
|
||||
return Err(AttributeError::AttributeTooBig)
|
||||
}
|
||||
|
||||
if index & 0b1111_1111_1100_0000_0000_0000_0000 != 0 {
|
||||
return Err(AttributeError::IndexTooBig)
|
||||
}
|
||||
|
||||
let attribute = (attribute as u32) << 22;
|
||||
Ok(Attribute(attribute | index))
|
||||
}
|
||||
|
||||
/// Construct an `Attribute` from an attribute number and
|
||||
/// the word position of a match according to the tokenizer used.
|
||||
///
|
||||
@ -36,12 +51,16 @@ impl Attribute {
|
||||
///
|
||||
/// The attribute must not be greater than 1024
|
||||
/// and the word index not greater than 2^22.
|
||||
fn new(attribute: u16, index: u32) -> Attribute {
|
||||
assert!(attribute & 0b1111_1100_0000_0000 == 0);
|
||||
assert!(index & 0b1111_1111_1100_0000_0000_0000_0000 == 0);
|
||||
|
||||
let attribute = (attribute as u32) << 22;
|
||||
Attribute(attribute | index)
|
||||
fn new_faillible(attribute: u16, index: u32) -> Attribute {
|
||||
match Attribute::new(attribute, index) {
|
||||
Ok(attribute) => attribute,
|
||||
Err(AttributeError::AttributeTooBig) => {
|
||||
panic!("attribute must not be greater than 1024")
|
||||
},
|
||||
Err(AttributeError::IndexTooBig) => {
|
||||
panic!("attribute word index must not be greater than 2^22")
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn attribute(&self) -> u16 {
|
||||
@ -62,6 +81,11 @@ impl fmt::Debug for Attribute {
|
||||
}
|
||||
}
|
||||
|
||||
enum AttributeError {
|
||||
AttributeTooBig,
|
||||
IndexTooBig,
|
||||
}
|
||||
|
||||
/// Represent a word position in bytes along with the length of it.
|
||||
///
|
||||
/// It can represent words byte index to maximum 2^22 and
|
||||
@ -77,12 +101,32 @@ impl WordArea {
|
||||
///
|
||||
/// The byte index must not be greater than 2^22
|
||||
/// and the length not greater than 1024.
|
||||
fn new(byte_index: u32, length: u16) -> WordArea {
|
||||
fn new(byte_index: u32, length: u16) -> Result<WordArea, WordAreaError> {
|
||||
assert!(byte_index & 0b1111_1111_1100_0000_0000_0000_0000 == 0);
|
||||
assert!(length & 0b1111_1100_0000_0000 == 0);
|
||||
|
||||
if byte_index & 0b1111_1111_1100_0000_0000_0000_0000 != 0 {
|
||||
return Err(WordAreaError::ByteIndexTooBig)
|
||||
}
|
||||
|
||||
if length & 0b1111_1100_0000_0000 != 0 {
|
||||
return Err(WordAreaError::LengthTooBig)
|
||||
}
|
||||
|
||||
let byte_index = byte_index << 10;
|
||||
WordArea(byte_index | (length as u32))
|
||||
Ok(WordArea(byte_index | (length as u32)))
|
||||
}
|
||||
|
||||
fn new_faillible(byte_index: u32, length: u16) -> WordArea {
|
||||
match WordArea::new(byte_index, length) {
|
||||
Ok(word_area) => word_area,
|
||||
Err(WordAreaError::ByteIndexTooBig) => {
|
||||
panic!("word area byte index must not be greater than 2^22")
|
||||
},
|
||||
Err(WordAreaError::LengthTooBig) => {
|
||||
panic!("word area length must not be greater than 1024")
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn byte_index(&self) -> u32 {
|
||||
@ -103,6 +147,11 @@ impl fmt::Debug for WordArea {
|
||||
}
|
||||
}
|
||||
|
||||
enum WordAreaError {
|
||||
ByteIndexTooBig,
|
||||
LengthTooBig,
|
||||
}
|
||||
|
||||
/// This structure represent the position of a word
|
||||
/// in a document and its attributes.
|
||||
///
|
||||
@ -166,9 +215,9 @@ impl Match {
|
||||
Match {
|
||||
query_index: 0,
|
||||
distance: 0,
|
||||
attribute: Attribute::new(0, 0),
|
||||
attribute: Attribute::new_faillible(0, 0),
|
||||
is_exact: false,
|
||||
word_area: WordArea::new(0, 0),
|
||||
word_area: WordArea::new_faillible(0, 0),
|
||||
}
|
||||
}
|
||||
|
||||
@ -200,7 +249,7 @@ mod tests {
|
||||
return TestResult::discard()
|
||||
}
|
||||
|
||||
let attribute = Attribute::new(gen_attr, gen_index);
|
||||
let attribute = Attribute::new_faillible(gen_attr, gen_index);
|
||||
|
||||
let valid_attribute = attribute.attribute() == gen_attr;
|
||||
let valid_index = attribute.word_index() == gen_index;
|
||||
@ -213,8 +262,8 @@ mod tests {
|
||||
return TestResult::discard()
|
||||
}
|
||||
|
||||
let a = Attribute::new(gen_attr, gen_index);
|
||||
let b = Attribute::new(gen_attr + 1, gen_index + 1);
|
||||
let a = Attribute::new_faillible(gen_attr, gen_index);
|
||||
let b = Attribute::new_faillible(gen_attr + 1, gen_index + 1);
|
||||
|
||||
TestResult::from_bool(a < b)
|
||||
}
|
||||
@ -224,7 +273,7 @@ mod tests {
|
||||
return TestResult::discard()
|
||||
}
|
||||
|
||||
let word_area = WordArea::new(gen_byte_index, gen_length);
|
||||
let word_area = WordArea::new_faillible(gen_byte_index, gen_length);
|
||||
|
||||
let valid_char_index = word_area.byte_index() == gen_byte_index;
|
||||
let valid_length = word_area.length() == gen_length;
|
||||
@ -237,8 +286,8 @@ mod tests {
|
||||
return TestResult::discard()
|
||||
}
|
||||
|
||||
let a = WordArea::new(gen_byte_index, gen_length);
|
||||
let b = WordArea::new(gen_byte_index + 1, gen_length + 1);
|
||||
let a = WordArea::new_faillible(gen_byte_index, gen_length);
|
||||
let b = WordArea::new_faillible(gen_byte_index + 1, gen_length + 1);
|
||||
|
||||
TestResult::from_bool(a < b)
|
||||
}
|
||||
|
@ -54,8 +54,20 @@ mod tests {
|
||||
fn one_typo_reference() {
|
||||
let doc0 = {
|
||||
let matches = vec![
|
||||
Match { query_index: 0, distance: 0, attribute: Attribute::new(0, 0), is_exact: false, word_area: WordArea::new(0, 6) },
|
||||
Match { query_index: 1, distance: 0, attribute: Attribute::new(0, 2), is_exact: false, word_area: WordArea::new(0, 6) },
|
||||
Match {
|
||||
query_index: 0,
|
||||
distance: 0,
|
||||
attribute: Attribute::new_faillible(0, 0),
|
||||
is_exact: false,
|
||||
word_area: WordArea::new_faillible(0, 6)
|
||||
},
|
||||
Match {
|
||||
query_index: 1,
|
||||
distance: 0,
|
||||
attribute: Attribute::new_faillible(0, 2),
|
||||
is_exact: false,
|
||||
word_area: WordArea::new_faillible(0, 6)
|
||||
},
|
||||
];
|
||||
Document {
|
||||
id: DocumentId(0),
|
||||
@ -65,8 +77,20 @@ mod tests {
|
||||
|
||||
let doc1 = {
|
||||
let matches = vec![
|
||||
Match { query_index: 0, distance: 1, attribute: Attribute::new(0, 0), is_exact: false, word_area: WordArea::new(0, 6) },
|
||||
Match { query_index: 1, distance: 0, attribute: Attribute::new(0, 2), is_exact: false, word_area: WordArea::new(0, 6) },
|
||||
Match {
|
||||
query_index: 0,
|
||||
distance: 1,
|
||||
attribute: Attribute::new_faillible(0, 0),
|
||||
is_exact: false,
|
||||
word_area: WordArea::new_faillible(0, 6)
|
||||
},
|
||||
Match {
|
||||
query_index: 1,
|
||||
distance: 0,
|
||||
attribute: Attribute::new_faillible(0, 2),
|
||||
is_exact: false,
|
||||
word_area: WordArea::new_faillible(0, 6)
|
||||
},
|
||||
];
|
||||
Document {
|
||||
id: DocumentId(1),
|
||||
@ -87,8 +111,20 @@ mod tests {
|
||||
fn no_typo() {
|
||||
let doc0 = {
|
||||
let matches = vec![
|
||||
Match { query_index: 0, distance: 0, attribute: Attribute::new(0, 0), is_exact: false, word_area: WordArea::new(0, 6) },
|
||||
Match { query_index: 1, distance: 0, attribute: Attribute::new(0, 1), is_exact: false, word_area: WordArea::new(0, 6) },
|
||||
Match {
|
||||
query_index: 0,
|
||||
distance: 0,
|
||||
attribute: Attribute::new_faillible(0, 0),
|
||||
is_exact: false,
|
||||
word_area: WordArea::new_faillible(0, 6)
|
||||
},
|
||||
Match {
|
||||
query_index: 1,
|
||||
distance: 0,
|
||||
attribute: Attribute::new_faillible(0, 1),
|
||||
is_exact: false,
|
||||
word_area: WordArea::new_faillible(0, 6)
|
||||
},
|
||||
];
|
||||
Document {
|
||||
id: DocumentId(0),
|
||||
@ -98,7 +134,13 @@ mod tests {
|
||||
|
||||
let doc1 = {
|
||||
let matches = vec![
|
||||
Match { query_index: 0, distance: 0, attribute: Attribute::new(0, 0), is_exact: false, word_area: WordArea::new(0, 6) },
|
||||
Match {
|
||||
query_index: 0,
|
||||
distance: 0,
|
||||
attribute: Attribute::new_faillible(0, 0),
|
||||
is_exact: false,
|
||||
word_area: WordArea::new_faillible(0, 6)
|
||||
},
|
||||
];
|
||||
Document {
|
||||
id: DocumentId(1),
|
||||
@ -119,8 +161,20 @@ mod tests {
|
||||
fn one_typo() {
|
||||
let doc0 = {
|
||||
let matches = vec![
|
||||
Match { query_index: 0, distance: 0, attribute: Attribute::new(0, 0), is_exact: false, word_area: WordArea::new(0, 6) },
|
||||
Match { query_index: 1, distance: 1, attribute: Attribute::new(0, 1), is_exact: false, word_area: WordArea::new(0, 6) },
|
||||
Match {
|
||||
query_index: 0,
|
||||
distance: 0,
|
||||
attribute: Attribute::new_faillible(0, 0),
|
||||
is_exact: false,
|
||||
word_area: WordArea::new_faillible(0, 6)
|
||||
},
|
||||
Match {
|
||||
query_index: 1,
|
||||
distance: 1,
|
||||
attribute: Attribute::new_faillible(0, 1),
|
||||
is_exact: false,
|
||||
word_area: WordArea::new_faillible(0, 6)
|
||||
},
|
||||
];
|
||||
Document {
|
||||
id: DocumentId(0),
|
||||
@ -130,7 +184,13 @@ mod tests {
|
||||
|
||||
let doc1 = {
|
||||
let matches = vec![
|
||||
Match { query_index: 0, distance: 0, attribute: Attribute::new(0, 0), is_exact: false, word_area: WordArea::new(0, 6) },
|
||||
Match {
|
||||
query_index: 0,
|
||||
distance: 0,
|
||||
attribute: Attribute::new_faillible(0, 0),
|
||||
is_exact: false,
|
||||
word_area: WordArea::new_faillible(0, 6)
|
||||
},
|
||||
];
|
||||
Document {
|
||||
id: DocumentId(1),
|
||||
|
@ -81,11 +81,11 @@ mod tests {
|
||||
// { id: 3, attr: 3, attr_index: 1 }
|
||||
|
||||
let matches = &[
|
||||
Match { query_index: 0, attribute: Attribute::new(0, 0), ..Match::zero() },
|
||||
Match { query_index: 1, attribute: Attribute::new(1, 0), ..Match::zero() },
|
||||
Match { query_index: 2, attribute: Attribute::new(1, 1), ..Match::zero() },
|
||||
Match { query_index: 2, attribute: Attribute::new(2, 0), ..Match::zero() },
|
||||
Match { query_index: 3, attribute: Attribute::new(3, 1), ..Match::zero() },
|
||||
Match { query_index: 0, attribute: Attribute::new_faillible(0, 0), ..Match::zero() },
|
||||
Match { query_index: 1, attribute: Attribute::new_faillible(1, 0), ..Match::zero() },
|
||||
Match { query_index: 2, attribute: Attribute::new_faillible(1, 1), ..Match::zero() },
|
||||
Match { query_index: 2, attribute: Attribute::new_faillible(2, 0), ..Match::zero() },
|
||||
Match { query_index: 3, attribute: Attribute::new_faillible(3, 1), ..Match::zero() },
|
||||
];
|
||||
|
||||
// soup -> of = 8
|
||||
@ -107,12 +107,12 @@ mod tests {
|
||||
// { id: 3, attr: 1, attr_index: 3 }
|
||||
|
||||
let matches = &[
|
||||
Match { query_index: 0, attribute: Attribute::new(0, 0), ..Match::zero() },
|
||||
Match { query_index: 0, attribute: Attribute::new(1, 0), ..Match::zero() },
|
||||
Match { query_index: 1, attribute: Attribute::new(1, 1), ..Match::zero() },
|
||||
Match { query_index: 2, attribute: Attribute::new(1, 2), ..Match::zero() },
|
||||
Match { query_index: 3, attribute: Attribute::new(0, 1), ..Match::zero() },
|
||||
Match { query_index: 3, attribute: Attribute::new(1, 3), ..Match::zero() },
|
||||
Match { query_index: 0, attribute: Attribute::new_faillible(0, 0), ..Match::zero() },
|
||||
Match { query_index: 0, attribute: Attribute::new_faillible(1, 0), ..Match::zero() },
|
||||
Match { query_index: 1, attribute: Attribute::new_faillible(1, 1), ..Match::zero() },
|
||||
Match { query_index: 2, attribute: Attribute::new_faillible(1, 2), ..Match::zero() },
|
||||
Match { query_index: 3, attribute: Attribute::new_faillible(0, 1), ..Match::zero() },
|
||||
Match { query_index: 3, attribute: Attribute::new_faillible(1, 3), ..Match::zero() },
|
||||
];
|
||||
|
||||
// soup -> of = 1
|
||||
|
Loading…
x
Reference in New Issue
Block a user