mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-23 19:57:30 +01:00
Simplify primary key inference
This commit is contained in:
parent
13c95d25aa
commit
402dcd6b2f
@ -130,8 +130,10 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
|
||||
MissingDocumentId { primary_key: String, document: Object },
|
||||
#[error("Document have too many matching `{}` attribute: `{}`.", .primary_key, serde_json::to_string(.document).unwrap())]
|
||||
TooManyDocumentIds { primary_key: String, document: Object },
|
||||
#[error("The primary key inference process failed because the engine did not find any fields containing `id` substring in their name. If your document identifier does not contain any `id` substring, you can set the primary key of the index.")]
|
||||
MissingPrimaryKey,
|
||||
#[error("The primary key inference process failed because the engine did not find any field ending with `id` in its name. Please specify the primary key manually using the `primaryKey` query parameter.")]
|
||||
NoPrimaryKeyCandidateFound,
|
||||
#[error("The primary key inference process failed because the engine found {} fields ending with `id` in their name, such as '{}' and '{}'. Please specify the primary key manually using the `primaryKey` query parameter.", .candidates.len(), .candidates.get(0).unwrap(), .candidates.get(1).unwrap())]
|
||||
MultiplePrimaryKeyCandidatesFound { candidates: Vec<String> },
|
||||
#[error("There is no more space left on the device. Consider increasing the size of the disk/partition.")]
|
||||
NoSpaceLeftOnDevice,
|
||||
#[error("Index already has a primary key: `{0}`.")]
|
||||
|
@ -58,17 +58,36 @@ pub fn enrich_documents_batch<R: Read + Seek>(
|
||||
}
|
||||
},
|
||||
None => {
|
||||
let guessed = documents_batch_index
|
||||
let mut guesses: Vec<(u16, &str)> = documents_batch_index
|
||||
.iter()
|
||||
.filter(|(_, name)| name.to_lowercase().contains(DEFAULT_PRIMARY_KEY))
|
||||
.min_by_key(|(fid, _)| *fid);
|
||||
match guessed {
|
||||
Some((id, name)) => PrimaryKey::flat(name.as_str(), *id),
|
||||
None if autogenerate_docids => PrimaryKey::flat(
|
||||
.filter(|(_, name)| name.to_lowercase().ends_with(DEFAULT_PRIMARY_KEY))
|
||||
.map(|(field_id, name)| (*field_id, name.as_str()))
|
||||
.collect();
|
||||
|
||||
// sort the keys in a deterministic, obvious way, so that fields are always in the same order.
|
||||
guesses.sort_by(|(_, left_name), (_, right_name)| {
|
||||
// shortest name first
|
||||
left_name.len().cmp(&right_name.len()).then_with(
|
||||
// then alphabetical order
|
||||
|| left_name.cmp(right_name),
|
||||
)
|
||||
});
|
||||
|
||||
match guesses.as_slice() {
|
||||
[] if autogenerate_docids => PrimaryKey::flat(
|
||||
DEFAULT_PRIMARY_KEY,
|
||||
documents_batch_index.insert(DEFAULT_PRIMARY_KEY),
|
||||
),
|
||||
None => return Ok(Err(UserError::MissingPrimaryKey)),
|
||||
[] => return Ok(Err(UserError::NoPrimaryKeyCandidateFound)),
|
||||
[(field_id, name)] => PrimaryKey::flat(name, *field_id),
|
||||
multiple => {
|
||||
return Ok(Err(UserError::MultiplePrimaryKeyCandidatesFound {
|
||||
candidates: multiple
|
||||
.iter()
|
||||
.map(|(_, candidate)| candidate.to_string())
|
||||
.collect(),
|
||||
}));
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
Loading…
x
Reference in New Issue
Block a user