From 04fd44b5e29dcf8f1e0992b18fecd959cc5076cb Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Mon, 25 Sep 2023 18:55:20 +0200 Subject: [PATCH] Use a vecDeque in wpp database --- .../extract_word_pair_proximity_docids.rs | 37 +++---------------- 1 file changed, 6 insertions(+), 31 deletions(-) diff --git a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs b/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs index 7c5155320..ef0899547 100644 --- a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs +++ b/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs @@ -1,5 +1,5 @@ use std::cmp::Ordering; -use std::collections::HashMap; +use std::collections::{HashMap, VecDeque}; use std::fs::File; use std::{cmp, io}; @@ -36,7 +36,8 @@ pub fn extract_word_pair_proximity_docids( max_memory.map(|m| m / 2), ); - let mut word_positions: Vec<(String, u16)> = Vec::with_capacity(MAX_DISTANCE as usize); + let mut word_positions: VecDeque<(String, u16)> = + VecDeque::with_capacity(MAX_DISTANCE as usize); let mut word_pair_proximity = HashMap::new(); let mut current_document_id = None; @@ -79,7 +80,7 @@ pub fn extract_word_pair_proximity_docids( // insert the new word. let word = std::str::from_utf8(word)?; - word_positions.push((word.to_string(), position)); + word_positions.push_back((word.to_string(), position)); } } @@ -122,10 +123,10 @@ fn document_word_positions_into_sorter( } fn word_positions_into_word_pair_proximity( - word_positions: &mut Vec<(String, u16)>, + word_positions: &mut VecDeque<(String, u16)>, word_pair_proximity: &mut HashMap<(String, String), u8>, ) -> Result<()> { - let (head_word, head_position) = word_positions.remove(0); + let (head_word, head_position) = word_positions.pop_front().unwrap(); for (word, position) in word_positions.iter() { let prox = positions_proximity(head_position as u32, *position as u32) as u8; word_pair_proximity @@ -137,29 +138,3 @@ fn word_positions_into_word_pair_proximity( } Ok(()) } - -struct PeekedWordPosition { - word: String, - position: u32, - iter: I, -} - -impl Ord for PeekedWordPosition { - fn cmp(&self, other: &Self) -> Ordering { - self.position.cmp(&other.position).reverse() - } -} - -impl PartialOrd for PeekedWordPosition { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl Eq for PeekedWordPosition {} - -impl PartialEq for PeekedWordPosition { - fn eq(&self, other: &Self) -> bool { - self.position == other.position - } -}