diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs index ebdba0a8c..d96891d82 100644 --- a/index-scheduler/src/batch.rs +++ b/index-scheduler/src/batch.rs @@ -1292,7 +1292,7 @@ impl IndexScheduler { || must_stop_processing.get(), )?; - let document_ids = documents.iter().cloned().flatten().collect(); + let document_ids = documents.iter().flatten().cloned().collect(); let (new_builder, user_result) = builder.remove_documents(document_ids)?; builder = new_builder; diff --git a/milli/src/external_documents_ids.rs b/milli/src/external_documents_ids.rs index e0a71b7cd..0e4891649 100644 --- a/milli/src/external_documents_ids.rs +++ b/milli/src/external_documents_ids.rs @@ -1,5 +1,4 @@ use std::collections::HashMap; -use std::convert::TryInto; use heed::types::{OwnedType, Str}; use heed::{Database, RoIter, RoTxn, RwTxn}; @@ -31,7 +30,7 @@ impl ExternalDocumentsIds { } pub fn get>(&self, rtxn: &RoTxn, external_id: A) -> heed::Result> { - Ok(self.0.get(rtxn, external_id.as_ref())?.map(|x| x.get().try_into().unwrap())) + Ok(self.0.get(rtxn, external_id.as_ref())?.map(|x| x.get())) } /// An helper function to debug this type, returns an `HashMap` of both, @@ -40,7 +39,7 @@ impl ExternalDocumentsIds { let mut map = HashMap::default(); for result in self.0.iter(rtxn)? { let (external, internal) = result?; - map.insert(external.to_owned(), internal.get().try_into().unwrap()); + map.insert(external.to_owned(), internal.get()); } Ok(map) } diff --git a/milli/src/index.rs b/milli/src/index.rs index a52033fb6..86ef6105b 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -1376,7 +1376,7 @@ impl Index { rtxn: &RoTxn, key: &(Script, Language), ) -> heed::Result> { - Ok(self.script_language_docids.get(rtxn, key)?) + self.script_language_docids.get(rtxn, key) } pub fn script_language(&self, rtxn: &RoTxn) -> heed::Result>> { diff --git a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs index 9895c1a64..0dcd6a42a 100644 --- a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs +++ b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs @@ -198,7 +198,7 @@ fn tokenizer_builder<'a>( } if let Some(script_language) = script_language { - tokenizer_builder.allow_list(&script_language); + tokenizer_builder.allow_list(script_language); } tokenizer_builder @@ -206,6 +206,7 @@ fn tokenizer_builder<'a>( /// Extract words maped with their positions of a document, /// ensuring no Language detection mistakes was made. +#[allow(clippy::too_many_arguments)] // FIXME: consider grouping arguments in a struct fn lang_safe_tokens_from_document<'a>( obkv: &KvReader, searchable_fields: &Option>, @@ -220,9 +221,9 @@ fn lang_safe_tokens_from_document<'a>( let mut script_language_word_count = HashMap::new(); tokens_from_document( - &obkv, + obkv, searchable_fields, - &tokenizer, + tokenizer, max_positions_per_attributes, del_add, buffers, @@ -257,7 +258,7 @@ fn lang_safe_tokens_from_document<'a>( // rerun the extraction. tokens_from_document( - &obkv, + obkv, searchable_fields, &tokenizer, max_positions_per_attributes, diff --git a/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs b/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs index accf4a510..182d0c5d8 100644 --- a/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs +++ b/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs @@ -45,7 +45,7 @@ pub fn extract_fid_word_count_docids( .ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?; let document_id = u32::from_be_bytes(document_id_bytes); - let del_add_reader = KvReaderDelAdd::new(&value); + let del_add_reader = KvReaderDelAdd::new(value); let deletion = del_add_reader // get deleted words .get(DelAdd::Deletion) diff --git a/milli/src/update/index_documents/extract/extract_word_docids.rs b/milli/src/update/index_documents/extract/extract_word_docids.rs index 5266e9bff..f278012c7 100644 --- a/milli/src/update/index_documents/extract/extract_word_docids.rs +++ b/milli/src/update/index_documents/extract/extract_word_docids.rs @@ -57,17 +57,17 @@ pub fn extract_word_docids( let document_id = u32::from_be_bytes(document_id_bytes); let fid = u16::from_be_bytes(fid_bytes); - let del_add_reader = KvReaderDelAdd::new(&value); + let del_add_reader = KvReaderDelAdd::new(value); // extract all unique words to remove. if let Some(deletion) = del_add_reader.get(DelAdd::Deletion) { - for (_pos, word) in KvReaderU16::new(&deletion).iter() { + for (_pos, word) in KvReaderU16::new(deletion).iter() { del_words.insert(word.to_vec()); } } // extract all unique additional words. if let Some(addition) = del_add_reader.get(DelAdd::Addition) { - for (_pos, word) in KvReaderU16::new(&addition).iter() { + for (_pos, word) in KvReaderU16::new(addition).iter() { add_words.insert(word.to_vec()); } } @@ -122,9 +122,9 @@ pub fn extract_word_docids( // every words contained in an attribute set to exact must be pushed in the exact_words list. if exact_attributes.contains(&fid) { - exact_word_docids_sorter.insert(word.as_bytes(), &value)?; + exact_word_docids_sorter.insert(word.as_bytes(), value)?; } else { - word_docids_sorter.insert(word.as_bytes(), &value)?; + word_docids_sorter.insert(word.as_bytes(), value)?; } } @@ -169,7 +169,7 @@ fn words_into_sorter( }; key_buffer.clear(); - key_buffer.extend_from_slice(&word_bytes); + key_buffer.extend_from_slice(word_bytes); key_buffer.push(0); key_buffer.extend_from_slice(&fid.to_be_bytes()); word_fid_docids_sorter.insert(&key_buffer, value_writer.into_inner().unwrap())?; diff --git a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs b/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs index 76a1d1d68..b8a377247 100644 --- a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs +++ b/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs @@ -29,7 +29,6 @@ pub fn extract_word_pair_proximity_docids( let max_memory = indexer.max_memory_by_thread(); let mut word_pair_proximity_docids_sorters: Vec<_> = (1..MAX_DISTANCE) - .into_iter() .map(|_| { create_sorter( grenad::SortAlgorithm::Unstable, @@ -75,7 +74,7 @@ pub fn extract_word_pair_proximity_docids( let (del, add): (Result<_>, Result<_>) = rayon::join( || { // deletions - if let Some(deletion) = KvReaderDelAdd::new(&value).get(DelAdd::Deletion) { + if let Some(deletion) = KvReaderDelAdd::new(value).get(DelAdd::Deletion) { for (position, word) in KvReaderU16::new(deletion).iter() { // drain the proximity window until the head word is considered close to the word we are inserting. while del_word_positions.get(0).map_or(false, |(_w, p)| { @@ -104,7 +103,7 @@ pub fn extract_word_pair_proximity_docids( }, || { // additions - if let Some(addition) = KvReaderDelAdd::new(&value).get(DelAdd::Addition) { + if let Some(addition) = KvReaderDelAdd::new(value).get(DelAdd::Addition) { for (position, word) in KvReaderU16::new(addition).iter() { // drain the proximity window until the head word is considered close to the word we are inserting. while add_word_positions.get(0).map_or(false, |(_w, p)| { @@ -170,7 +169,7 @@ fn document_word_positions_into_sorter( document_id: DocumentId, del_word_pair_proximity: &BTreeMap<(String, String), u8>, add_word_pair_proximity: &BTreeMap<(String, String), u8>, - word_pair_proximity_docids_sorters: &mut Vec>, + word_pair_proximity_docids_sorters: &mut [grenad::Sorter], ) -> Result<()> { use itertools::merge_join_by; use itertools::EitherOrBoth::{Both, Left, Right}; @@ -201,7 +200,7 @@ fn document_word_positions_into_sorter( }; key_buffer.clear(); - key_buffer.push(*prox as u8); + key_buffer.push(*prox); key_buffer.extend_from_slice(w1.as_bytes()); key_buffer.push(0); key_buffer.extend_from_slice(w2.as_bytes()); diff --git a/milli/src/update/index_documents/extract/extract_word_position_docids.rs b/milli/src/update/index_documents/extract/extract_word_position_docids.rs index 2ff2f2ad5..1b9ec66ff 100644 --- a/milli/src/update/index_documents/extract/extract_word_position_docids.rs +++ b/milli/src/update/index_documents/extract/extract_word_position_docids.rs @@ -60,7 +60,7 @@ pub fn extract_word_position_docids( current_document_id = Some(document_id); - let del_add_reader = KvReaderDelAdd::new(&value); + let del_add_reader = KvReaderDelAdd::new(value); // extract all unique words to remove. if let Some(deletion) = del_add_reader.get(DelAdd::Deletion) { for (position, word_bytes) in KvReaderU16::new(deletion).iter() { diff --git a/milli/src/update/index_documents/helpers/merge_functions.rs b/milli/src/update/index_documents/helpers/merge_functions.rs index 770629c8e..98c1c1a04 100644 --- a/milli/src/update/index_documents/helpers/merge_functions.rs +++ b/milli/src/update/index_documents/helpers/merge_functions.rs @@ -157,7 +157,7 @@ fn inner_merge_del_add_obkvs<'a>( let mut acc = newest[1..].to_vec(); let mut buffer = Vec::new(); // reverse iter from the most recent to the oldest. - for current in obkvs.into_iter().rev() { + for current in obkvs.iter().rev() { // if in the previous iteration there was a complete deletion, // stop the merge process. if acc_operation_type == Operation::Deletion as u8 { diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 66e6d16dc..2be410ace 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -2659,7 +2659,7 @@ mod tests { let external_document_ids = index.external_documents_ids(); let ids_to_delete: Vec = external_ids .iter() - .map(|id| external_document_ids.get(&wtxn, id).unwrap().unwrap()) + .map(|id| external_document_ids.get(wtxn, id).unwrap().unwrap()) .collect(); // Delete some documents. diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs index 80671e39f..b53d859cd 100644 --- a/milli/src/update/index_documents/typed_chunk.rs +++ b/milli/src/update/index_documents/typed_chunk.rs @@ -456,7 +456,7 @@ pub(crate) fn write_typed_chunk_into_index( if final_value.is_empty() { // If the database entry exists, delete it. - if db_key_exists == true { + if db_key_exists { index.script_language_docids.delete(wtxn, &key)?; } } else { @@ -501,6 +501,7 @@ fn merge_word_docids_reader_into_fst( /// /// If there is no Add side we currently write an empty buffer /// which is a valid CboRoaringBitmap. +#[allow(clippy::ptr_arg)] // required to avoid signature mismatch fn deladd_serialize_add_side<'a>(obkv: &'a [u8], _buffer: &mut Vec) -> Result<&'a [u8]> { Ok(KvReaderDelAdd::new(obkv).get(DelAdd::Addition).unwrap_or_default()) }