mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-23 19:57:30 +01:00
Simplify primary key inference
This commit is contained in:
parent
13c95d25aa
commit
402dcd6b2f
@ -130,8 +130,10 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
|
|||||||
MissingDocumentId { primary_key: String, document: Object },
|
MissingDocumentId { primary_key: String, document: Object },
|
||||||
#[error("Document have too many matching `{}` attribute: `{}`.", .primary_key, serde_json::to_string(.document).unwrap())]
|
#[error("Document have too many matching `{}` attribute: `{}`.", .primary_key, serde_json::to_string(.document).unwrap())]
|
||||||
TooManyDocumentIds { primary_key: String, document: Object },
|
TooManyDocumentIds { primary_key: String, document: Object },
|
||||||
#[error("The primary key inference process failed because the engine did not find any fields containing `id` substring in their name. If your document identifier does not contain any `id` substring, you can set the primary key of the index.")]
|
#[error("The primary key inference process failed because the engine did not find any field ending with `id` in its name. Please specify the primary key manually using the `primaryKey` query parameter.")]
|
||||||
MissingPrimaryKey,
|
NoPrimaryKeyCandidateFound,
|
||||||
|
#[error("The primary key inference process failed because the engine found {} fields ending with `id` in their name, such as '{}' and '{}'. Please specify the primary key manually using the `primaryKey` query parameter.", .candidates.len(), .candidates.get(0).unwrap(), .candidates.get(1).unwrap())]
|
||||||
|
MultiplePrimaryKeyCandidatesFound { candidates: Vec<String> },
|
||||||
#[error("There is no more space left on the device. Consider increasing the size of the disk/partition.")]
|
#[error("There is no more space left on the device. Consider increasing the size of the disk/partition.")]
|
||||||
NoSpaceLeftOnDevice,
|
NoSpaceLeftOnDevice,
|
||||||
#[error("Index already has a primary key: `{0}`.")]
|
#[error("Index already has a primary key: `{0}`.")]
|
||||||
|
@ -58,17 +58,36 @@ pub fn enrich_documents_batch<R: Read + Seek>(
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
None => {
|
None => {
|
||||||
let guessed = documents_batch_index
|
let mut guesses: Vec<(u16, &str)> = documents_batch_index
|
||||||
.iter()
|
.iter()
|
||||||
.filter(|(_, name)| name.to_lowercase().contains(DEFAULT_PRIMARY_KEY))
|
.filter(|(_, name)| name.to_lowercase().ends_with(DEFAULT_PRIMARY_KEY))
|
||||||
.min_by_key(|(fid, _)| *fid);
|
.map(|(field_id, name)| (*field_id, name.as_str()))
|
||||||
match guessed {
|
.collect();
|
||||||
Some((id, name)) => PrimaryKey::flat(name.as_str(), *id),
|
|
||||||
None if autogenerate_docids => PrimaryKey::flat(
|
// sort the keys in a deterministic, obvious way, so that fields are always in the same order.
|
||||||
|
guesses.sort_by(|(_, left_name), (_, right_name)| {
|
||||||
|
// shortest name first
|
||||||
|
left_name.len().cmp(&right_name.len()).then_with(
|
||||||
|
// then alphabetical order
|
||||||
|
|| left_name.cmp(right_name),
|
||||||
|
)
|
||||||
|
});
|
||||||
|
|
||||||
|
match guesses.as_slice() {
|
||||||
|
[] if autogenerate_docids => PrimaryKey::flat(
|
||||||
DEFAULT_PRIMARY_KEY,
|
DEFAULT_PRIMARY_KEY,
|
||||||
documents_batch_index.insert(DEFAULT_PRIMARY_KEY),
|
documents_batch_index.insert(DEFAULT_PRIMARY_KEY),
|
||||||
),
|
),
|
||||||
None => return Ok(Err(UserError::MissingPrimaryKey)),
|
[] => return Ok(Err(UserError::NoPrimaryKeyCandidateFound)),
|
||||||
|
[(field_id, name)] => PrimaryKey::flat(name, *field_id),
|
||||||
|
multiple => {
|
||||||
|
return Ok(Err(UserError::MultiplePrimaryKeyCandidatesFound {
|
||||||
|
candidates: multiple
|
||||||
|
.iter()
|
||||||
|
.map(|(_, candidate)| candidate.to_string())
|
||||||
|
.collect(),
|
||||||
|
}));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
Loading…
x
Reference in New Issue
Block a user