diff --git a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs index 894a193bf..ca65f0874 100644 --- a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs +++ b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs @@ -67,14 +67,17 @@ pub fn extract_docid_word_positions( for (index, token) in tokens { let token = token.text().trim(); - key_buffer.truncate(mem::size_of::()); - key_buffer.extend_from_slice(token.as_bytes()); + if !token.is_empty() { + key_buffer.truncate(mem::size_of::()); + key_buffer.extend_from_slice(token.as_bytes()); - let position: u32 = index - .try_into() - .map_err(|_| SerializationError::InvalidNumberSerialization)?; - let position = field_id as u32 * ONE_ATTRIBUTE + position; - docid_word_positions_sorter.insert(&key_buffer, &position.to_ne_bytes())?; + let position: u32 = index + .try_into() + .map_err(|_| SerializationError::InvalidNumberSerialization)?; + let position = field_id as u32 * ONE_ATTRIBUTE + position; + docid_word_positions_sorter + .insert(&key_buffer, &position.to_ne_bytes())?; + } } } } diff --git a/milli/src/update/index_documents/helpers/mod.rs b/milli/src/update/index_documents/helpers/mod.rs index 3f38d4f25..128288982 100644 --- a/milli/src/update/index_documents/helpers/mod.rs +++ b/milli/src/update/index_documents/helpers/mod.rs @@ -17,7 +17,7 @@ pub use merge_functions::{ }; pub fn valid_lmdb_key(key: impl AsRef<[u8]>) -> bool { - key.as_ref().len() <= 511 + key.as_ref().len() <= 511 && !key.as_ref().is_empty() } /// Divides one slice into two at an index, returns `None` if mid is out of bounds.