mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 23:04:26 +01:00
Replace hashmap by vectors in wpp
This commit is contained in:
parent
f13e076b8a
commit
1a0e962299
@ -36,7 +36,8 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor {
|
|||||||
document_change: DocumentChange,
|
document_change: DocumentChange,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let mut key_buffer = Vec::new();
|
let mut key_buffer = Vec::new();
|
||||||
let mut word_pair_proximity = HashMap::new();
|
let mut del_word_pair_proximity = Vec::new();
|
||||||
|
let mut add_word_pair_proximity = Vec::new();
|
||||||
let mut word_positions: VecDeque<(String, u16)> =
|
let mut word_positions: VecDeque<(String, u16)> =
|
||||||
VecDeque::with_capacity(MAX_DISTANCE as usize);
|
VecDeque::with_capacity(MAX_DISTANCE as usize);
|
||||||
|
|
||||||
@ -50,12 +51,7 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor {
|
|||||||
fields_ids_map,
|
fields_ids_map,
|
||||||
&mut word_positions,
|
&mut word_positions,
|
||||||
&mut |(w1, w2), prox| {
|
&mut |(w1, w2), prox| {
|
||||||
word_pair_proximity
|
del_word_pair_proximity.push(((w1, w2), prox));
|
||||||
.entry((w1, w2))
|
|
||||||
.and_modify(|(del_p, _add_p)| {
|
|
||||||
*del_p = std::cmp::min(*del_p, prox);
|
|
||||||
})
|
|
||||||
.or_insert((prox, 0));
|
|
||||||
},
|
},
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
@ -67,12 +63,7 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor {
|
|||||||
fields_ids_map,
|
fields_ids_map,
|
||||||
&mut word_positions,
|
&mut word_positions,
|
||||||
&mut |(w1, w2), prox| {
|
&mut |(w1, w2), prox| {
|
||||||
word_pair_proximity
|
del_word_pair_proximity.push(((w1, w2), prox));
|
||||||
.entry((w1, w2))
|
|
||||||
.and_modify(|(del_p, _add_p)| {
|
|
||||||
*del_p = std::cmp::min(*del_p, prox);
|
|
||||||
})
|
|
||||||
.or_insert((prox, 0));
|
|
||||||
},
|
},
|
||||||
)?;
|
)?;
|
||||||
let document = inner.new();
|
let document = inner.new();
|
||||||
@ -82,12 +73,7 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor {
|
|||||||
fields_ids_map,
|
fields_ids_map,
|
||||||
&mut word_positions,
|
&mut word_positions,
|
||||||
&mut |(w1, w2), prox| {
|
&mut |(w1, w2), prox| {
|
||||||
word_pair_proximity
|
add_word_pair_proximity.push(((w1, w2), prox));
|
||||||
.entry((w1, w2))
|
|
||||||
.and_modify(|(_del_p, add_p)| {
|
|
||||||
*add_p = std::cmp::min(*add_p, prox);
|
|
||||||
})
|
|
||||||
.or_insert((0, prox));
|
|
||||||
},
|
},
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
@ -99,24 +85,25 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor {
|
|||||||
fields_ids_map,
|
fields_ids_map,
|
||||||
&mut word_positions,
|
&mut word_positions,
|
||||||
&mut |(w1, w2), prox| {
|
&mut |(w1, w2), prox| {
|
||||||
word_pair_proximity
|
add_word_pair_proximity.push(((w1, w2), prox));
|
||||||
.entry((w1, w2))
|
|
||||||
.and_modify(|(_del_p, add_p)| {
|
|
||||||
*add_p = std::cmp::min(*add_p, prox);
|
|
||||||
})
|
|
||||||
.or_insert((0, prox));
|
|
||||||
},
|
},
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for ((w1, w2), (del_p, add_p)) in word_pair_proximity.iter() {
|
del_word_pair_proximity.sort_unstable();
|
||||||
let key = build_key(*del_p, w1, w2, &mut key_buffer);
|
del_word_pair_proximity.dedup_by(|(k1, _), (k2, _)| k1 == k2);
|
||||||
|
for ((w1, w2), prox) in del_word_pair_proximity.iter() {
|
||||||
|
let key = build_key(*prox, w1, w2, &mut key_buffer);
|
||||||
cached_sorter.insert_del_u32(key, docid)?;
|
cached_sorter.insert_del_u32(key, docid)?;
|
||||||
let key = build_key(*add_p, w1, w2, &mut key_buffer);
|
|
||||||
cached_sorter.insert_add_u32(key, docid)?;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
add_word_pair_proximity.sort_unstable();
|
||||||
|
add_word_pair_proximity.dedup_by(|(k1, _), (k2, _)| k1 == k2);
|
||||||
|
for ((w1, w2), prox) in add_word_pair_proximity.iter() {
|
||||||
|
let key = build_key(*prox, w1, w2, &mut key_buffer);
|
||||||
|
cached_sorter.insert_add_u32(key, docid)?;
|
||||||
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -132,19 +119,13 @@ fn build_key<'a>(prox: u8, w1: &str, w2: &str, key_buffer: &'a mut Vec<u8>) -> &
|
|||||||
|
|
||||||
fn word_positions_into_word_pair_proximity(
|
fn word_positions_into_word_pair_proximity(
|
||||||
word_positions: &mut VecDeque<(String, u16)>,
|
word_positions: &mut VecDeque<(String, u16)>,
|
||||||
word_pair_proximity: &mut dyn FnMut((String, String), u8),
|
word_pair_proximity: &mut impl FnMut((String, String), u8),
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let (head_word, head_position) = word_positions.pop_front().unwrap();
|
let (head_word, head_position) = word_positions.pop_front().unwrap();
|
||||||
for (word, position) in word_positions.iter() {
|
for (word, position) in word_positions.iter() {
|
||||||
let prox = index_proximity(head_position as u32, *position as u32) as u8;
|
let prox = index_proximity(head_position as u32, *position as u32) as u8;
|
||||||
if prox > 0 && prox < MAX_DISTANCE as u8 {
|
if prox > 0 && prox < MAX_DISTANCE as u8 {
|
||||||
word_pair_proximity((head_word.clone(), word.clone()), prox);
|
word_pair_proximity((head_word.clone(), word.clone()), prox);
|
||||||
// word_pair_proximity
|
|
||||||
// .entry((head_word.clone(), word.clone()))
|
|
||||||
// .and_modify(|p| {
|
|
||||||
// *p = std::cmp::min(*p, prox);
|
|
||||||
// })
|
|
||||||
// .or_insert(prox);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
@ -155,7 +136,7 @@ fn process_document_tokens(
|
|||||||
document_tokenizer: &DocumentTokenizer,
|
document_tokenizer: &DocumentTokenizer,
|
||||||
fields_ids_map: &mut GlobalFieldsIdsMap,
|
fields_ids_map: &mut GlobalFieldsIdsMap,
|
||||||
word_positions: &mut VecDeque<(String, u16)>,
|
word_positions: &mut VecDeque<(String, u16)>,
|
||||||
word_pair_proximity: &mut dyn FnMut((String, String), u8),
|
word_pair_proximity: &mut impl FnMut((String, String), u8),
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let mut token_fn = |_fname: &str, _fid: FieldId, pos: u16, word: &str| {
|
let mut token_fn = |_fname: &str, _fid: FieldId, pos: u16, word: &str| {
|
||||||
// drain the proximity window until the head word is considered close to the word we are inserting.
|
// drain the proximity window until the head word is considered close to the word we are inserting.
|
||||||
|
Loading…
Reference in New Issue
Block a user