This commit is contained in:
ManyTheFish 2023-09-19 18:29:21 +02:00 committed by Louis Dureuil
parent 8d77736a67
commit 11ea5acff9
No known key found for this signature in database
2 changed files with 17 additions and 17 deletions

View File

@ -73,7 +73,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
while let Some((key, value)) = cursor.move_on_next()? { while let Some((key, value)) = cursor.move_on_next()? {
let (document_id_bytes, fid_bytes) = try_split_array_at(key) let (document_id_bytes, fid_bytes) = try_split_array_at(key)
.ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?; .ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
let (fid_bytes, _) = try_split_array_at(key) let (fid_bytes, _) = try_split_array_at(fid_bytes)
.ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?; .ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
let document_id = u32::from_be_bytes(document_id_bytes); let document_id = u32::from_be_bytes(document_id_bytes);
fid = u16::from_be_bytes(fid_bytes); fid = u16::from_be_bytes(fid_bytes);

View File

@ -47,7 +47,6 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
.ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?; .ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
let document_id = u32::from_be_bytes(document_id_bytes); let document_id = u32::from_be_bytes(document_id_bytes);
for (position, word) in KvReaderU16::new(&value).iter() {
// if we change document, we fill the sorter // if we change document, we fill the sorter
if current_document_id.map_or(false, |id| id != document_id) { if current_document_id.map_or(false, |id| id != document_id) {
while !word_positions.is_empty() { while !word_positions.is_empty() {
@ -66,6 +65,7 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
word_positions.clear(); word_positions.clear();
} }
for (position, word) in KvReaderU16::new(&value).iter() {
// drain the proximity window until the head word is considered close to the word we are inserting. // drain the proximity window until the head word is considered close to the word we are inserting.
while word_positions.get(0).map_or(false, |(_w, p)| { while word_positions.get(0).map_or(false, |(_w, p)| {
positions_proximity(*p as u32, position as u32) > MAX_DISTANCE positions_proximity(*p as u32, position as u32) > MAX_DISTANCE