Skip empty normalized words

This commit is contained in:
many 2021-09-08 15:24:52 +02:00
parent d18ee58ab9
commit e54280fbfc
No known key found for this signature in database
GPG Key ID: 2CEF23B75189EACA

View File

@ -67,14 +67,17 @@ pub fn extract_docid_word_positions<R: io::Read>(
for (index, token) in tokens { for (index, token) in tokens {
let token = token.text().trim(); let token = token.text().trim();
key_buffer.truncate(mem::size_of::<u32>()); if !token.is_empty() {
key_buffer.extend_from_slice(token.as_bytes()); key_buffer.truncate(mem::size_of::<u32>());
key_buffer.extend_from_slice(token.as_bytes());
let position: u32 = index let position: u32 = index
.try_into() .try_into()
.map_err(|_| SerializationError::InvalidNumberSerialization)?; .map_err(|_| SerializationError::InvalidNumberSerialization)?;
let position = field_id as u32 * ONE_ATTRIBUTE + position; let position = field_id as u32 * ONE_ATTRIBUTE + position;
docid_word_positions_sorter.insert(&key_buffer, &position.to_ne_bytes())?; docid_word_positions_sorter
.insert(&key_buffer, &position.to_ne_bytes())?;
}
} }
} }
} }