This commit is contained in:
ManyTheFish 2023-09-19 18:29:21 +02:00
parent d4594306d3
commit c2dcd66d32
2 changed files with 17 additions and 17 deletions

View File

@ -69,7 +69,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
while let Some((key, value)) = cursor.move_on_next()? { while let Some((key, value)) = cursor.move_on_next()? {
let (document_id_bytes, fid_bytes) = try_split_array_at(key) let (document_id_bytes, fid_bytes) = try_split_array_at(key)
.ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?; .ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
let (fid_bytes, _) = try_split_array_at(key) let (fid_bytes, _) = try_split_array_at(fid_bytes)
.ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?; .ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
let document_id = u32::from_be_bytes(document_id_bytes); let document_id = u32::from_be_bytes(document_id_bytes);
fid = u16::from_be_bytes(fid_bytes); fid = u16::from_be_bytes(fid_bytes);

View File

@ -46,25 +46,25 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
.ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?; .ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
let document_id = u32::from_be_bytes(document_id_bytes); let document_id = u32::from_be_bytes(document_id_bytes);
for (position, word) in KvReaderU16::new(&value).iter() { // if we change document, we fill the sorter
// if we change document, we fill the sorter if current_document_id.map_or(false, |id| id != document_id) {
if current_document_id.map_or(false, |id| id != document_id) { while !word_positions.is_empty() {
while !word_positions.is_empty() { word_positions_into_word_pair_proximity(
word_positions_into_word_pair_proximity( &mut word_positions,
&mut word_positions, &mut word_pair_proximity,
&mut word_pair_proximity,
)?;
}
document_word_positions_into_sorter(
document_id,
&word_pair_proximity,
&mut word_pair_proximity_docids_sorter,
)?; )?;
word_pair_proximity.clear();
word_positions.clear();
} }
document_word_positions_into_sorter(
document_id,
&word_pair_proximity,
&mut word_pair_proximity_docids_sorter,
)?;
word_pair_proximity.clear();
word_positions.clear();
}
for (position, word) in KvReaderU16::new(&value).iter() {
// drain the proximity window until the head word is considered close to the word we are inserting. // drain the proximity window until the head word is considered close to the word we are inserting.
while word_positions.get(0).map_or(false, |(_w, p)| { while word_positions.get(0).map_or(false, |(_w, p)| {
positions_proximity(*p as u32, position as u32) > MAX_DISTANCE positions_proximity(*p as u32, position as u32) > MAX_DISTANCE