mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 14:54:27 +01:00
Merge #841
841: Unique docid bugfix r=LegendreM a=MarinPostma fix #827 Co-authored-by: mpostma <postma.marin@protonmail.com>
This commit is contained in:
commit
308630c094
@ -171,15 +171,23 @@ pub fn apply_addition<'a, 'b>(
|
|||||||
let mut new_internal_docids = Vec::with_capacity(new_documents.len());
|
let mut new_internal_docids = Vec::with_capacity(new_documents.len());
|
||||||
|
|
||||||
for mut document in new_documents {
|
for mut document in new_documents {
|
||||||
|
let external_docids_get = |docid: &str| {
|
||||||
|
match (external_docids.get(docid), new_external_docids.get(docid)) {
|
||||||
|
(_, Some(&id))
|
||||||
|
| (Some(id), _) => Some(id as u32),
|
||||||
|
(None, None) => None,
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
let (internal_docid, external_docid) =
|
let (internal_docid, external_docid) =
|
||||||
extract_document_id(
|
extract_document_id(
|
||||||
&primary_key,
|
&primary_key,
|
||||||
&document,
|
&document,
|
||||||
&external_docids,
|
&external_docids_get,
|
||||||
&mut available_ids,
|
&mut available_ids,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
new_external_docids.insert(external_docid, internal_docid.0);
|
new_external_docids.insert(external_docid, internal_docid.0 as u64);
|
||||||
new_internal_docids.push(internal_docid);
|
new_internal_docids.push(internal_docid);
|
||||||
|
|
||||||
if partial {
|
if partial {
|
||||||
|
@ -6,7 +6,7 @@ use meilisearch_types::DocumentId;
|
|||||||
use ordered_float::OrderedFloat;
|
use ordered_float::OrderedFloat;
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
|
|
||||||
use crate::{Number, FstMapCow};
|
use crate::Number;
|
||||||
use crate::raw_indexer::RawIndexer;
|
use crate::raw_indexer::RawIndexer;
|
||||||
use crate::serde::SerializerError;
|
use crate::serde::SerializerError;
|
||||||
use crate::store::DiscoverIds;
|
use crate::store::DiscoverIds;
|
||||||
@ -98,15 +98,17 @@ pub fn value_to_number(value: &Value) -> Option<Number> {
|
|||||||
|
|
||||||
/// Validates a string representation to be a correct document id and returns
|
/// Validates a string representation to be a correct document id and returns
|
||||||
/// the corresponding id or generate a new one, this is the way we produce documents ids.
|
/// the corresponding id or generate a new one, this is the way we produce documents ids.
|
||||||
pub fn discover_document_id(
|
pub fn discover_document_id<F>(
|
||||||
docid: &str,
|
docid: &str,
|
||||||
external_docids: &FstMapCow,
|
external_docids_get: F,
|
||||||
available_docids: &mut DiscoverIds<'_>,
|
available_docids: &mut DiscoverIds<'_>,
|
||||||
) -> Result<DocumentId, SerializerError>
|
) -> Result<DocumentId, SerializerError>
|
||||||
|
where
|
||||||
|
F: FnOnce(&str) -> Option<u32>
|
||||||
{
|
{
|
||||||
if docid.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') {
|
if docid.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') {
|
||||||
match external_docids.get(docid) {
|
match external_docids_get(docid) {
|
||||||
Some(id) => Ok(DocumentId(id as u32)),
|
Some(id) => Ok(DocumentId(id)),
|
||||||
None => {
|
None => {
|
||||||
let internal_id = available_docids.next().expect("no more ids available");
|
let internal_id = available_docids.next().expect("no more ids available");
|
||||||
Ok(internal_id)
|
Ok(internal_id)
|
||||||
@ -118,12 +120,14 @@ pub fn discover_document_id(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Extracts and validates the document id of a document.
|
/// Extracts and validates the document id of a document.
|
||||||
pub fn extract_document_id(
|
pub fn extract_document_id<F>(
|
||||||
primary_key: &str,
|
primary_key: &str,
|
||||||
document: &IndexMap<String, Value>,
|
document: &IndexMap<String, Value>,
|
||||||
external_docids: &FstMapCow,
|
external_docids_get: F,
|
||||||
available_docids: &mut DiscoverIds<'_>,
|
available_docids: &mut DiscoverIds<'_>,
|
||||||
) -> Result<(DocumentId, String), SerializerError>
|
) -> Result<(DocumentId, String), SerializerError>
|
||||||
|
where
|
||||||
|
F: FnOnce(&str) -> Option<u32>
|
||||||
{
|
{
|
||||||
match document.get(primary_key) {
|
match document.get(primary_key) {
|
||||||
Some(value) => {
|
Some(value) => {
|
||||||
@ -132,7 +136,7 @@ pub fn extract_document_id(
|
|||||||
Value::String(string) => string.clone(),
|
Value::String(string) => string.clone(),
|
||||||
_ => return Err(SerializerError::InvalidDocumentIdFormat),
|
_ => return Err(SerializerError::InvalidDocumentIdFormat),
|
||||||
};
|
};
|
||||||
discover_document_id(&docid, external_docids, available_docids).map(|id| (id, docid))
|
discover_document_id(&docid, external_docids_get, available_docids).map(|id| (id, docid))
|
||||||
}
|
}
|
||||||
None => Err(SerializerError::DocumentIdNotFound),
|
None => Err(SerializerError::DocumentIdNotFound),
|
||||||
}
|
}
|
||||||
|
@ -195,3 +195,23 @@ async fn add_document_with_long_field() {
|
|||||||
let (response, _status) = server.search_post(json!({ "q": "request_buffering" })).await;
|
let (response, _status) = server.search_post(json!({ "q": "request_buffering" })).await;
|
||||||
assert!(!response["hits"].as_array().unwrap().is_empty());
|
assert!(!response["hits"].as_array().unwrap().is_empty());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn documents_with_same_id_are_overwritten() {
|
||||||
|
let mut server = common::Server::with_uid("test");
|
||||||
|
server.create_index(json!({ "uid": "test"})).await;
|
||||||
|
let documents = json!([
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"content": "test1"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"content": "test2"
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
server.add_or_replace_multiple_documents(documents).await;
|
||||||
|
let (response, _status) = server.get_all_documents().await;
|
||||||
|
assert_eq!(response.as_array().unwrap().len(), 1);
|
||||||
|
assert_eq!(response.as_array().unwrap()[0].as_object().unwrap()["content"], "test2");
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user