Fix undefined behaviour caused by reusing key from the database

New full snapshot:
---
source: milli/src/update/word_prefix_pair_proximity_docids.rs
---
5                a    1  [101, ]
5                a    2  [101, ]
5                am   1  [101, ]
5                b    4  [101, ]
5                be   4  [101, ]
am               a    3  [101, ]
amazing          a    1  [100, ]
amazing          a    2  [100, ]
amazing          a    3  [100, ]
amazing          an   1  [100, ]
amazing          an   2  [100, ]
amazing          b    2  [100, ]
amazing          be   2  [100, ]
an               a    1  [100, ]
an               a    2  [100, 202, ]
an               am   1  [100, ]
an               an   2  [100, ]
an               b    3  [100, ]
an               be   3  [100, ]
and              a    2  [100, ]
and              a    3  [100, ]
and              a    4  [100, ]
and              am   2  [100, ]
and              an   3  [100, ]
and              b    1  [100, ]
and              be   1  [100, ]
at               a    1  [100, 202, ]
at               a    2  [100, 101, ]
at               a    3  [100, ]
at               am   2  [100, 101, ]
at               an   1  [100, 202, ]
at               an   3  [100, ]
at               b    3  [101, ]
at               b    4  [100, ]
at               be   3  [101, ]
at               be   4  [100, ]
beautiful        a    2  [100, ]
beautiful        a    3  [100, ]
beautiful        a    4  [100, ]
beautiful        am   3  [100, ]
beautiful        an   2  [100, ]
beautiful        an   4  [100, ]
bell             a    2  [101, ]
bell             a    4  [101, ]
bell             am   4  [101, ]
extraordinary    a    2  [202, ]
extraordinary    a    3  [202, ]
extraordinary    an   2  [202, ]
house            a    3  [100, 202, ]
house            a    4  [100, 202, ]
house            am   4  [100, ]
house            an   3  [100, 202, ]
house            b    2  [100, ]
house            be   2  [100, ]
rings            a    1  [101, ]
rings            a    3  [101, ]
rings            am   3  [101, ]
rings            b    2  [101, ]
rings            be   2  [101, ]
the              a    3  [101, ]
the              b    1  [101, ]
the              be   1  [101, ]
This commit is contained in:
Loïc Lecrenier 2022-08-10 12:04:48 +02:00
parent 7309111433
commit ef75a77464
2 changed files with 4 additions and 4 deletions

View File

@ -1,4 +1,4 @@
---
source: milli/src/update/word_prefix_pair_proximity_docids.rs
---
53e42e513b83885139e4f6d817888561
5ed4bf83317b10962a55ade353427bdd

View File

@ -554,8 +554,8 @@ fn insert_into_database(
process: "get-put-merge",
}
})?;
// safety: we don't keep references from inside the LMDB database.
unsafe { iter.put_current(key, &val)? };
// safety: we use the new_key, not the one from the database iterator, to avoid undefined behaviour
unsafe { iter.put_current(new_key, &val)? };
}
_ => {
drop(iter);
@ -579,7 +579,7 @@ pub fn write_into_lmdb_database_without_merging(
let mut out_iter = database.iter_mut::<_, ByteSlice, ByteSlice>(wtxn)?;
let mut cursor = reader.into_cursor()?;
while let Some((k, v)) = cursor.move_on_next()? {
// safety: we don't keep references from inside the LMDB database.
// safety: the key comes from the grenad reader, not the database
unsafe { out_iter.append(k, v)? };
}
} else {