Skip empty normalized words

This commit is contained in:
many 2021-09-08 15:24:52 +02:00
parent d18ee58ab9
commit e54280fbfc
No known key found for this signature in database
GPG Key ID: 2CEF23B75189EACA

View File

@ -67,14 +67,17 @@ pub fn extract_docid_word_positions<R: io::Read>(
for (index, token) in tokens {
let token = token.text().trim();
key_buffer.truncate(mem::size_of::<u32>());
key_buffer.extend_from_slice(token.as_bytes());
if !token.is_empty() {
key_buffer.truncate(mem::size_of::<u32>());
key_buffer.extend_from_slice(token.as_bytes());
let position: u32 = index
.try_into()
.map_err(|_| SerializationError::InvalidNumberSerialization)?;
let position = field_id as u32 * ONE_ATTRIBUTE + position;
docid_word_positions_sorter.insert(&key_buffer, &position.to_ne_bytes())?;
let position: u32 = index
.try_into()
.map_err(|_| SerializationError::InvalidNumberSerialization)?;
let position = field_id as u32 * ONE_ATTRIBUTE + position;
docid_word_positions_sorter
.insert(&key_buffer, &position.to_ne_bytes())?;
}
}
}
}