543: Fix wrong internal ids assignments r=irevoire a=irevoire

Fix https://github.com/meilisearch/meilisearch/issues/2470

Co-authored-by: ad hoc <postma.marin@protonmail.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
bors[bot] 2022-06-08 09:19:58 +00:00 committed by GitHub
commit 306d2f37ff
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 53 additions and 3 deletions

View File

@ -1881,4 +1881,52 @@ mod tests {
wtxn.commit().unwrap();
}
#[test]
fn index_documents_in_multiple_transforms() {
let tmp = tempfile::tempdir().unwrap();
let mut options = EnvOpenOptions::new();
options.map_size(4096 * 100);
let index = Index::new(options, tmp).unwrap();
let mut wtxn = index.write_txn().unwrap();
let indexer_config = IndexerConfig::default();
let mut builder = IndexDocuments::new(
&mut wtxn,
&index,
&indexer_config,
IndexDocumentsConfig::default(),
|_| (),
)
.unwrap();
let doc1 = documents! {[{
"id": 228142,
"title": "asdsad",
"state": "automated",
"priority": "normal",
"public_uid": "37ccf021",
"project_id": 78207,
"branch_id_number": 0
}]};
let doc2 = documents! {[{
"id": 228143,
"title": "something",
"state": "automated",
"priority": "normal",
"public_uid": "39c6499b",
"project_id": 78207,
"branch_id_number": 0
}]};
builder.add_documents(doc1).unwrap();
builder.add_documents(doc2).unwrap();
builder.execute().unwrap();
let map = index.external_documents_ids(&wtxn).unwrap().to_hash_map();
let ids = map.values().collect::<HashSet<_>>();
assert_eq!(ids.len(), map.len());
}
}

View File

@ -51,6 +51,7 @@ pub struct Transform<'a, 'i> {
indexer_settings: &'a IndexerConfig,
pub autogenerate_docids: bool,
pub index_documents_method: IndexDocumentsMethod,
available_documents_ids: AvailableDocumentsIds,
original_sorter: grenad::Sorter<MergeFn>,
flattened_sorter: grenad::Sorter<MergeFn>,
@ -128,12 +129,14 @@ impl<'a, 'i> Transform<'a, 'i> {
indexer_settings.max_nb_chunks,
indexer_settings.max_memory.map(|mem| mem / 2),
);
let documents_ids = index.documents_ids(wtxn)?;
Ok(Transform {
index,
fields_ids_map: index.fields_ids_map(wtxn)?,
indexer_settings,
autogenerate_docids,
available_documents_ids: AvailableDocumentsIds::from_documents_ids(&documents_ids),
original_sorter,
flattened_sorter,
index_documents_method,
@ -156,8 +159,6 @@ impl<'a, 'i> Transform<'a, 'i> {
{
let fields_index = reader.index();
let external_documents_ids = self.index.external_documents_ids(wtxn)?;
let documents_ids = self.index.documents_ids(wtxn)?;
let mut available_documents_ids = AvailableDocumentsIds::from_documents_ids(&documents_ids);
let mapping = create_fields_mapping(&mut self.fields_ids_map, fields_index)?;
@ -261,7 +262,8 @@ impl<'a, 'i> Transform<'a, 'i> {
// if the document has never been encountered we give it a new docid
// and push this new docid to the external documents ids builder
Entry::Vacant(entry) => {
let new_docid = available_documents_ids
let new_docid = self
.available_documents_ids
.next()
.ok_or(UserError::DocumentLimitReached)?;
entry.insert(new_docid as u64);