This commit is contained in:
ManyTheFish 2023-09-19 18:29:21 +02:00 committed by Louis Dureuil
parent 8d77736a67
commit 11ea5acff9
No known key found for this signature in database
2 changed files with 17 additions and 17 deletions

View File

@ -73,7 +73,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
while let Some((key, value)) = cursor.move_on_next()? {
let (document_id_bytes, fid_bytes) = try_split_array_at(key)
.ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
let (fid_bytes, _) = try_split_array_at(key)
let (fid_bytes, _) = try_split_array_at(fid_bytes)
.ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
let document_id = u32::from_be_bytes(document_id_bytes);
fid = u16::from_be_bytes(fid_bytes);

View File

@ -47,25 +47,25 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
.ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
let document_id = u32::from_be_bytes(document_id_bytes);
for (position, word) in KvReaderU16::new(&value).iter() {
// if we change document, we fill the sorter
if current_document_id.map_or(false, |id| id != document_id) {
while !word_positions.is_empty() {
word_positions_into_word_pair_proximity(
&mut word_positions,
&mut word_pair_proximity,
)?;
}
document_word_positions_into_sorter(
document_id,
&word_pair_proximity,
&mut word_pair_proximity_docids_sorter,
// if we change document, we fill the sorter
if current_document_id.map_or(false, |id| id != document_id) {
while !word_positions.is_empty() {
word_positions_into_word_pair_proximity(
&mut word_positions,
&mut word_pair_proximity,
)?;
word_pair_proximity.clear();
word_positions.clear();
}
document_word_positions_into_sorter(
document_id,
&word_pair_proximity,
&mut word_pair_proximity_docids_sorter,
)?;
word_pair_proximity.clear();
word_positions.clear();
}
for (position, word) in KvReaderU16::new(&value).iter() {
// drain the proximity window until the head word is considered close to the word we are inserting.
while word_positions.get(0).map_or(false, |(_w, p)| {
positions_proximity(*p as u32, position as u32) > MAX_DISTANCE