mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-27 07:00:05 +01:00
Merge #2298
2298: Nested fields r=irevoire a=irevoire There are a few things that I want to fix _AFTER_ merging this PR. For the following RCs. ## Stop the useless conversion In the `search.rs` I convert a `Document` to a `Value`, and then the `Value` to a `Document` and then back to a `Value` etc. I should stop doing all these conversion and stick to one format. Probably by merging my `permissive-json-pointer` crate into meilisearch. That would also give me the opportunity to work directly with obkvs and stops deserializing fields I don't need. ## Add more test specific to the nested Everything seems to works but I should write tests to double check that the nested works well with the `formatted` field. ## See how I could stop iterating on hashmap and instead fill them correctly This is related to milli. I really often needs to iterate over hashmap to see if a field is a subset of another field. I could probably generate a structure containing all the possible key values. ie. the user say `doggo` is an attribute to retrieve. Instead of iterating on all the attributes to retrieve to check if `doggo.name` is a subset of `doggo`. I should insert `doggo.name` in the attributes to retrieve map. Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
commit
31584f34e8
33
Cargo.lock
generated
33
Cargo.lock
generated
@ -1087,7 +1087,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "filter-parser"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/meilisearch/milli.git?tag=v0.25.0#4ae7aea3b274a86780754dc8bebb36e06501f894"
|
||||
source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.0#9ac2fd1c379d5b91c80471c23079dbba57b9a841"
|
||||
dependencies = [
|
||||
"nom",
|
||||
"nom_locate",
|
||||
@ -1111,6 +1111,14 @@ dependencies = [
|
||||
"miniz_oxide",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "flatten-serde-json"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.0#9ac2fd1c379d5b91c80471c23079dbba57b9a841"
|
||||
dependencies = [
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "float-cmp"
|
||||
version = "0.9.0"
|
||||
@ -1643,9 +1651,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.121"
|
||||
version = "0.2.122"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "efaa7b300f3b5fe8eb6bf21ce3895e1751d9665086af2d64b42f19701015ff4f"
|
||||
checksum = "ec647867e2bf0772e28c8bcde4f0d19a9216916e890543b5a03ed8ef27b8f259"
|
||||
|
||||
[[package]]
|
||||
name = "libgit2-sys"
|
||||
@ -2062,6 +2070,7 @@ dependencies = [
|
||||
"once_cell",
|
||||
"parking_lot",
|
||||
"paste",
|
||||
"permissive-json-pointer",
|
||||
"proptest",
|
||||
"proptest-derive",
|
||||
"rand",
|
||||
@ -2128,8 +2137,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "milli"
|
||||
version = "0.25.0"
|
||||
source = "git+https://github.com/meilisearch/milli.git?tag=v0.25.0#4ae7aea3b274a86780754dc8bebb36e06501f894"
|
||||
version = "0.26.0"
|
||||
source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.0#9ac2fd1c379d5b91c80471c23079dbba57b9a841"
|
||||
dependencies = [
|
||||
"bimap",
|
||||
"bincode",
|
||||
@ -2140,6 +2149,7 @@ dependencies = [
|
||||
"csv",
|
||||
"either",
|
||||
"filter-parser",
|
||||
"flatten-serde-json",
|
||||
"fst",
|
||||
"fxhash",
|
||||
"geoutils",
|
||||
@ -2463,6 +2473,15 @@ version = "2.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e"
|
||||
|
||||
[[package]]
|
||||
name = "permissive-json-pointer"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2125f5fc44a45ffd265ce6ab343842f71df469d173f923f234e3a8df7a8f1ba6"
|
||||
dependencies = [
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf"
|
||||
version = "0.10.1"
|
||||
@ -3797,9 +3816,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "webpki-roots"
|
||||
version = "0.22.2"
|
||||
version = "0.22.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "552ceb903e957524388c4d3475725ff2c8b7960922063af6ce53c9a43da07449"
|
||||
checksum = "44d8de8415c823c8abd270ad483c6feeac771fad964890779f9a8cb24fbbc1bf"
|
||||
dependencies = [
|
||||
"webpki",
|
||||
]
|
||||
|
@ -6,7 +6,7 @@ edition = "2021"
|
||||
[dependencies]
|
||||
enum-iterator = "0.7.0"
|
||||
meilisearch-error = { path = "../meilisearch-error" }
|
||||
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.25.0" }
|
||||
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.26.0" }
|
||||
rand = "0.8.4"
|
||||
serde = { version = "1.0.136", features = ["derive"] }
|
||||
serde_json = { version = "1.0.79", features = ["preserve_order"] }
|
||||
|
@ -70,11 +70,9 @@ impl<P, D> GuardedData<P, D> {
|
||||
where
|
||||
P: Policy + 'static,
|
||||
{
|
||||
Ok(tokio::task::spawn_blocking(move || {
|
||||
P::authenticate(auth, token.as_ref(), index.as_deref())
|
||||
})
|
||||
.await
|
||||
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))?)
|
||||
tokio::task::spawn_blocking(move || P::authenticate(auth, token.as_ref(), index.as_deref()))
|
||||
.await
|
||||
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1013,7 +1013,7 @@ async fn error_add_documents_invalid_geo_field() {
|
||||
assert_eq!(response["status"], "failed");
|
||||
|
||||
let expected_error = json!({
|
||||
"message": r#"The document with the id: `11` contains an invalid _geo field: `foobar`."#,
|
||||
"message": r#"The document with the id: `11` contains an invalid `_geo` field."#,
|
||||
"code": "invalid_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_geo_field"
|
||||
|
@ -155,7 +155,7 @@ async fn test_get_all_documents_offset() {
|
||||
.await;
|
||||
assert_eq!(code, 200);
|
||||
assert_eq!(response.as_array().unwrap().len(), 20);
|
||||
assert_eq!(response.as_array().unwrap()[0]["id"], 13);
|
||||
assert_eq!(response.as_array().unwrap()[0]["id"], 5);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
|
@ -11,29 +11,86 @@ static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
json!([
|
||||
{
|
||||
"title": "Shazam!",
|
||||
"id": "287947"
|
||||
"id": "287947",
|
||||
},
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"id": "299537"
|
||||
"id": "299537",
|
||||
},
|
||||
{
|
||||
"title": "Escape Room",
|
||||
"id": "522681"
|
||||
"id": "522681",
|
||||
},
|
||||
{ "title": "How to Train Your Dragon: The Hidden World", "id": "166428"
|
||||
{
|
||||
"title": "How to Train Your Dragon: The Hidden World",
|
||||
"id": "166428",
|
||||
},
|
||||
{
|
||||
"title": "Glass",
|
||||
"id": "450465"
|
||||
"id": "450465",
|
||||
}
|
||||
])
|
||||
});
|
||||
|
||||
static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
json!([
|
||||
{
|
||||
"id": 852,
|
||||
"father": "jean",
|
||||
"mother": "michelle",
|
||||
"doggos": [
|
||||
{
|
||||
"name": "bobby",
|
||||
"age": 2,
|
||||
},
|
||||
{
|
||||
"name": "buddy",
|
||||
"age": 4,
|
||||
},
|
||||
],
|
||||
"cattos": "pesti",
|
||||
},
|
||||
{
|
||||
"id": 654,
|
||||
"father": "pierre",
|
||||
"mother": "sabine",
|
||||
"doggos": [
|
||||
{
|
||||
"name": "gros bill",
|
||||
"age": 8,
|
||||
},
|
||||
],
|
||||
"cattos": ["simba", "pestiféré"],
|
||||
},
|
||||
{
|
||||
"id": 750,
|
||||
"father": "romain",
|
||||
"mother": "michelle",
|
||||
"cattos": ["enigma"],
|
||||
},
|
||||
{
|
||||
"id": 951,
|
||||
"father": "jean-baptiste",
|
||||
"mother": "sophie",
|
||||
"doggos": [
|
||||
{
|
||||
"name": "turbo",
|
||||
"age": 5,
|
||||
},
|
||||
{
|
||||
"name": "fast",
|
||||
"age": 6,
|
||||
},
|
||||
],
|
||||
"cattos": ["moumoute", "gomez"],
|
||||
},
|
||||
])
|
||||
});
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn simple_placeholder_search() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
let index = server.index("basic");
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
@ -45,6 +102,18 @@ async fn simple_placeholder_search() {
|
||||
assert_eq!(response["hits"].as_array().unwrap().len(), 5);
|
||||
})
|
||||
.await;
|
||||
|
||||
let index = server.index("nested");
|
||||
let documents = NESTED_DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
index
|
||||
.search(json!({}), |response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(response["hits"].as_array().unwrap().len(), 4);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
@ -62,6 +131,18 @@ async fn simple_search() {
|
||||
assert_eq!(response["hits"].as_array().unwrap().len(), 1);
|
||||
})
|
||||
.await;
|
||||
|
||||
let index = server.index("nested");
|
||||
let documents = NESTED_DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "pesti"}), |response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(response["hits"].as_array().unwrap().len(), 2);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
@ -88,6 +169,27 @@ async fn search_multiple_params() {
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
let index = server.index("nested");
|
||||
let documents = NESTED_DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"q": "pesti",
|
||||
"attributesToCrop": ["catto:2"],
|
||||
"attributesToHighlight": ["catto"],
|
||||
"limit": 2,
|
||||
"offset": 0,
|
||||
}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(response["hits"].as_array().unwrap().len(), 2);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
@ -114,6 +216,43 @@ async fn search_with_filter_string_notation() {
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
let index = server.index("nested");
|
||||
|
||||
index
|
||||
.update_settings(json!({"filterableAttributes": ["cattos", "doggos.age"]}))
|
||||
.await;
|
||||
|
||||
let documents = NESTED_DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(3).await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"filter": "cattos = pesti"
|
||||
}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(response["hits"].as_array().unwrap().len(), 1);
|
||||
assert_eq!(response["hits"][0]["id"], json!(852));
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"filter": "doggos.age > 5"
|
||||
}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(response["hits"].as_array().unwrap().len(), 2);
|
||||
assert_eq!(response["hits"][0]["id"], json!(654));
|
||||
assert_eq!(response["hits"][1]["id"], json!(951));
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
@ -170,6 +309,28 @@ async fn search_with_sort_on_numbers() {
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
let index = server.index("nested");
|
||||
|
||||
index
|
||||
.update_settings(json!({"sortableAttributes": ["doggos.age"]}))
|
||||
.await;
|
||||
|
||||
let documents = NESTED_DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(3).await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"sort": ["doggos.age:asc"]
|
||||
}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(response["hits"].as_array().unwrap().len(), 4);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
@ -196,6 +357,28 @@ async fn search_with_sort_on_strings() {
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
let index = server.index("nested");
|
||||
|
||||
index
|
||||
.update_settings(json!({"sortableAttributes": ["doggos.name"]}))
|
||||
.await;
|
||||
|
||||
let documents = NESTED_DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(3).await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"sort": ["doggos.name:asc"]
|
||||
}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(response["hits"].as_array().unwrap().len(), 4);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
@ -246,6 +429,85 @@ async fn search_facet_distribution() {
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
let index = server.index("nested");
|
||||
|
||||
index
|
||||
.update_settings(json!({"filterableAttributes": ["father", "doggos.name"]}))
|
||||
.await;
|
||||
|
||||
let documents = NESTED_DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(3).await;
|
||||
|
||||
// TODO: TAMO: fix the test
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
// "facetsDistribution": ["father", "doggos.name"]
|
||||
"facetsDistribution": ["father"]
|
||||
}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
let dist = response["facetsDistribution"].as_object().unwrap();
|
||||
assert_eq!(dist.len(), 1);
|
||||
assert_eq!(
|
||||
dist["father"],
|
||||
json!({ "jean": 1, "pierre": 1, "romain": 1, "jean-baptiste": 1})
|
||||
);
|
||||
/*
|
||||
assert_eq!(
|
||||
dist["doggos.name"],
|
||||
json!({ "bobby": 1, "buddy": 1, "gros bill": 1, "turbo": 1, "fast": 1})
|
||||
);
|
||||
*/
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
index
|
||||
.update_settings(json!({"filterableAttributes": ["doggos"]}))
|
||||
.await;
|
||||
index.wait_task(4).await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"facetsDistribution": ["doggos.name"]
|
||||
}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
let dist = response["facetsDistribution"].as_object().unwrap();
|
||||
assert_eq!(dist.len(), 1);
|
||||
assert_eq!(
|
||||
dist["doggos.name"],
|
||||
json!({ "bobby": 1, "buddy": 1, "gros bill": 1, "turbo": 1, "fast": 1})
|
||||
);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"facetsDistribution": ["doggos"]
|
||||
}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
let dist = response["facetsDistribution"].as_object().unwrap();
|
||||
dbg!(&dist);
|
||||
assert_eq!(dist.len(), 2);
|
||||
assert_eq!(
|
||||
dist["doggos.name"],
|
||||
json!({ "bobby": 1, "buddy": 1, "gros bill": 1, "turbo": 1, "fast": 1})
|
||||
);
|
||||
assert_eq!(
|
||||
dist["doggos.age"],
|
||||
json!({ "2": 1, "4": 1, "5": 1, "6": 1, "8": 1})
|
||||
);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
@ -265,7 +527,7 @@ async fn displayed_attributes() {
|
||||
.search_post(json!({ "attributesToRetrieve": ["title", "id"] }))
|
||||
.await;
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert!(response["hits"].get("title").is_none());
|
||||
assert!(response["hits"][0].get("title").is_some());
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
|
@ -30,12 +30,13 @@ lazy_static = "1.4.0"
|
||||
log = "0.4.14"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-error = { path = "../meilisearch-error" }
|
||||
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.25.0" }
|
||||
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.26.0" }
|
||||
mime = "0.3.16"
|
||||
num_cpus = "1.13.1"
|
||||
obkv = "0.2.0"
|
||||
once_cell = "1.10.0"
|
||||
parking_lot = "0.12.0"
|
||||
permissive-json-pointer = "0.2.0"
|
||||
rand = "0.8.5"
|
||||
rayon = "1.5.1"
|
||||
regex = "1.5.5"
|
||||
|
@ -146,7 +146,7 @@ impl Index {
|
||||
indexer_config,
|
||||
config,
|
||||
|_| (),
|
||||
);
|
||||
)?;
|
||||
builder.add_documents(documents_reader)?;
|
||||
builder.execute()?;
|
||||
}
|
||||
|
@ -106,12 +106,21 @@ pub struct SearchResult {
|
||||
pub exhaustive_facets_count: Option<bool>,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
#[derive(Copy, Clone, Default)]
|
||||
struct FormatOptions {
|
||||
highlight: bool,
|
||||
crop: Option<usize>,
|
||||
}
|
||||
|
||||
impl FormatOptions {
|
||||
pub fn merge(self, other: Self) -> Self {
|
||||
Self {
|
||||
highlight: self.highlight || other.highlight,
|
||||
crop: self.crop.or(other.crop),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Index {
|
||||
pub fn perform_search(&self, query: SearchQuery) -> Result<SearchResult> {
|
||||
let before_search = Instant::now();
|
||||
@ -231,8 +240,8 @@ impl Index {
|
||||
.then(|| compute_matches(&matching_words, &document, &analyzer));
|
||||
|
||||
let formatted = format_fields(
|
||||
&document,
|
||||
&fields_ids_map,
|
||||
obkv,
|
||||
&formatter,
|
||||
&matching_words,
|
||||
&formatted_options,
|
||||
@ -471,50 +480,74 @@ fn make_document(
|
||||
field_ids_map: &FieldsIdsMap,
|
||||
obkv: obkv::KvReaderU16,
|
||||
) -> Result<Document> {
|
||||
let mut document = Document::new();
|
||||
let mut document = serde_json::Map::new();
|
||||
|
||||
for attr in attributes_to_retrieve {
|
||||
if let Some(value) = obkv.get(*attr) {
|
||||
let value = serde_json::from_slice(value)?;
|
||||
// recreate the original json
|
||||
for (key, value) in obkv.iter() {
|
||||
let value = serde_json::from_slice(value)?;
|
||||
let key = field_ids_map
|
||||
.name(key)
|
||||
.expect("Missing field name")
|
||||
.to_string();
|
||||
|
||||
// This unwrap must be safe since we got the ids from the fields_ids_map just
|
||||
// before.
|
||||
let key = field_ids_map
|
||||
.name(*attr)
|
||||
.expect("Missing field name")
|
||||
.to_string();
|
||||
|
||||
document.insert(key, value);
|
||||
}
|
||||
document.insert(key, value);
|
||||
}
|
||||
|
||||
// select the attributes to retrieve
|
||||
let attributes_to_retrieve = attributes_to_retrieve
|
||||
.iter()
|
||||
.map(|&fid| field_ids_map.name(fid).expect("Missing field name"));
|
||||
|
||||
let document = permissive_json_pointer::select_values(&document, attributes_to_retrieve);
|
||||
|
||||
// then we need to convert the `serde_json::Map` into an `IndexMap`.
|
||||
let document = document.into_iter().collect();
|
||||
|
||||
Ok(document)
|
||||
}
|
||||
|
||||
fn format_fields<A: AsRef<[u8]>>(
|
||||
document: &Document,
|
||||
field_ids_map: &FieldsIdsMap,
|
||||
obkv: obkv::KvReaderU16,
|
||||
formatter: &Formatter<A>,
|
||||
matching_words: &impl Matcher,
|
||||
formatted_options: &BTreeMap<FieldId, FormatOptions>,
|
||||
) -> Result<Document> {
|
||||
let mut document = Document::new();
|
||||
// Convert the `IndexMap` into a `serde_json::Map`.
|
||||
let document = document
|
||||
.iter()
|
||||
.map(|(k, v)| (k.clone(), v.clone()))
|
||||
.collect();
|
||||
|
||||
for (id, format) in formatted_options {
|
||||
if let Some(value) = obkv.get(*id) {
|
||||
let mut value: Value = serde_json::from_slice(value)?;
|
||||
let selectors: Vec<_> = formatted_options
|
||||
.keys()
|
||||
// This unwrap must be safe since we got the ids from the fields_ids_map just
|
||||
// before.
|
||||
.map(|&fid| field_ids_map.name(fid).unwrap())
|
||||
.collect();
|
||||
|
||||
value = formatter.format_value(value, matching_words, *format);
|
||||
let mut document = permissive_json_pointer::select_values(&document, selectors.iter().copied());
|
||||
|
||||
// This unwrap must be safe since we got the ids from the fields_ids_map just
|
||||
// before.
|
||||
let key = field_ids_map
|
||||
.name(*id)
|
||||
.expect("Missing field name")
|
||||
.to_string();
|
||||
permissive_json_pointer::map_leaf_values(&mut document, selectors, |key, value| {
|
||||
// To get the formatting option of each key we need to see all the rules that applies
|
||||
// to the value and merge them together. eg. If a user said he wanted to highlight `doggo`
|
||||
// and crop `doggo.name`. `doggo.name` needs to be highlighted + cropped while `doggo.age` is only
|
||||
// highlighted.
|
||||
let format = formatted_options
|
||||
.iter()
|
||||
.filter(|(field, _option)| {
|
||||
let name = field_ids_map.name(**field).unwrap();
|
||||
milli::is_faceted_by(name, key) || milli::is_faceted_by(key, name)
|
||||
})
|
||||
.fold(FormatOptions::default(), |acc, (_, option)| {
|
||||
acc.merge(*option)
|
||||
});
|
||||
// TODO: remove this useless clone
|
||||
*value = formatter.format_value(value.clone(), matching_words, format);
|
||||
});
|
||||
|
||||
document.insert(key, value);
|
||||
}
|
||||
}
|
||||
// we need to convert back the `serde_json::Map` into an `IndexMap`.
|
||||
let document = document.into_iter().collect();
|
||||
|
||||
Ok(document)
|
||||
}
|
||||
@ -798,23 +831,27 @@ mod test {
|
||||
);
|
||||
|
||||
let mut fields = FieldsIdsMap::new();
|
||||
let id = fields.insert("test").unwrap();
|
||||
fields.insert("test").unwrap();
|
||||
|
||||
let mut buf = Vec::new();
|
||||
let mut obkv = obkv::KvWriter::new(&mut buf);
|
||||
obkv.insert(id, Value::String("hello".into()).to_string().as_bytes())
|
||||
.unwrap();
|
||||
obkv.finish().unwrap();
|
||||
let document: serde_json::Value = json!({
|
||||
"test": "hello",
|
||||
});
|
||||
|
||||
let obkv = obkv::KvReader::new(&buf);
|
||||
// we need to convert the `serde_json::Map` into an `IndexMap`.
|
||||
let document = document
|
||||
.as_object()
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k.clone(), v.clone()))
|
||||
.collect();
|
||||
|
||||
let formatted_options = BTreeMap::new();
|
||||
|
||||
let matching_words = MatchingWords::default();
|
||||
|
||||
let value = format_fields(
|
||||
&document,
|
||||
&fields,
|
||||
obkv,
|
||||
&formatter,
|
||||
&matching_words,
|
||||
&formatted_options,
|
||||
@ -840,25 +877,18 @@ mod test {
|
||||
let title = fields.insert("title").unwrap();
|
||||
let author = fields.insert("author").unwrap();
|
||||
|
||||
let mut buf = Vec::new();
|
||||
let mut obkv = obkv::KvWriter::new(&mut buf);
|
||||
obkv.insert(
|
||||
title,
|
||||
Value::String("The Hobbit".into()).to_string().as_bytes(),
|
||||
)
|
||||
.unwrap();
|
||||
obkv.finish().unwrap();
|
||||
obkv = obkv::KvWriter::new(&mut buf);
|
||||
obkv.insert(
|
||||
author,
|
||||
Value::String("J. R. R. Tolkien".into())
|
||||
.to_string()
|
||||
.as_bytes(),
|
||||
)
|
||||
.unwrap();
|
||||
obkv.finish().unwrap();
|
||||
let document: serde_json::Value = json!({
|
||||
"title": "The Hobbit",
|
||||
"author": "J. R. R. Tolkien",
|
||||
});
|
||||
|
||||
let obkv = obkv::KvReader::new(&buf);
|
||||
// we need to convert the `serde_json::Map` into an `IndexMap`.
|
||||
let document = document
|
||||
.as_object()
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k.clone(), v.clone()))
|
||||
.collect();
|
||||
|
||||
let mut formatted_options = BTreeMap::new();
|
||||
formatted_options.insert(
|
||||
@ -880,8 +910,8 @@ mod test {
|
||||
matching_words.insert("hobbit", Some(3));
|
||||
|
||||
let value = format_fields(
|
||||
&document,
|
||||
&fields,
|
||||
obkv,
|
||||
&formatter,
|
||||
&matching_words,
|
||||
&formatted_options,
|
||||
@ -909,38 +939,19 @@ mod test {
|
||||
let author = fields.insert("author").unwrap();
|
||||
let publication_year = fields.insert("publication_year").unwrap();
|
||||
|
||||
let mut buf = Vec::new();
|
||||
let mut obkv = obkv::KvWriter::new(&mut buf);
|
||||
let document: serde_json::Value = json!({
|
||||
"title": "The Hobbit",
|
||||
"author": "J. R. R. Tolkien",
|
||||
"publication_year": 1937,
|
||||
});
|
||||
|
||||
obkv.insert(
|
||||
title,
|
||||
Value::String("The Hobbit".into()).to_string().as_bytes(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
obkv.finish().unwrap();
|
||||
obkv = obkv::KvWriter::new(&mut buf);
|
||||
|
||||
obkv.insert(
|
||||
author,
|
||||
Value::String("J. R. R. Tolkien".into())
|
||||
.to_string()
|
||||
.as_bytes(),
|
||||
)
|
||||
.unwrap();
|
||||
obkv.finish().unwrap();
|
||||
|
||||
obkv = obkv::KvWriter::new(&mut buf);
|
||||
|
||||
obkv.insert(
|
||||
publication_year,
|
||||
Value::Number(1937.into()).to_string().as_bytes(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
obkv.finish().unwrap();
|
||||
|
||||
let obkv = obkv::KvReader::new(&buf);
|
||||
// we need to convert the `serde_json::Map` into an `IndexMap`.
|
||||
let document = document
|
||||
.as_object()
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k.clone(), v.clone()))
|
||||
.collect();
|
||||
|
||||
let mut formatted_options = BTreeMap::new();
|
||||
formatted_options.insert(
|
||||
@ -969,8 +980,8 @@ mod test {
|
||||
matching_words.insert("1937", Some(4));
|
||||
|
||||
let value = format_fields(
|
||||
&document,
|
||||
&fields,
|
||||
obkv,
|
||||
&formatter,
|
||||
&matching_words,
|
||||
&formatted_options,
|
||||
@ -999,23 +1010,18 @@ mod test {
|
||||
let title = fields.insert("title").unwrap();
|
||||
let author = fields.insert("author").unwrap();
|
||||
|
||||
let mut buf = Vec::new();
|
||||
let mut obkv = obkv::KvWriter::new(&mut buf);
|
||||
obkv.insert(
|
||||
title,
|
||||
Value::String("Go💼od luck.".into()).to_string().as_bytes(),
|
||||
)
|
||||
.unwrap();
|
||||
obkv.finish().unwrap();
|
||||
obkv = obkv::KvWriter::new(&mut buf);
|
||||
obkv.insert(
|
||||
author,
|
||||
Value::String("JacobLey".into()).to_string().as_bytes(),
|
||||
)
|
||||
.unwrap();
|
||||
obkv.finish().unwrap();
|
||||
let document: serde_json::Value = json!({
|
||||
"title": "Go💼od luck.",
|
||||
"author": "JacobLey",
|
||||
});
|
||||
|
||||
let obkv = obkv::KvReader::new(&buf);
|
||||
// we need to convert the `serde_json::Map` into an `IndexMap`.
|
||||
let document = document
|
||||
.as_object()
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k.clone(), v.clone()))
|
||||
.collect();
|
||||
|
||||
let mut formatted_options = BTreeMap::new();
|
||||
formatted_options.insert(
|
||||
@ -1039,8 +1045,8 @@ mod test {
|
||||
matching_words.insert("gobriefcase od", Some(11));
|
||||
|
||||
let value = format_fields(
|
||||
&document,
|
||||
&fields,
|
||||
obkv,
|
||||
&formatter,
|
||||
&matching_words,
|
||||
&formatted_options,
|
||||
@ -1067,22 +1073,18 @@ mod test {
|
||||
let title = fields.insert("title").unwrap();
|
||||
let author = fields.insert("author").unwrap();
|
||||
|
||||
let mut buf = Vec::new();
|
||||
let mut obkv = obkv::KvWriter::new(&mut buf);
|
||||
obkv.insert(title, Value::String("étoile".into()).to_string().as_bytes())
|
||||
.unwrap();
|
||||
obkv.finish().unwrap();
|
||||
obkv = obkv::KvWriter::new(&mut buf);
|
||||
obkv.insert(
|
||||
author,
|
||||
Value::String("J. R. R. Tolkien".into())
|
||||
.to_string()
|
||||
.as_bytes(),
|
||||
)
|
||||
.unwrap();
|
||||
obkv.finish().unwrap();
|
||||
let document: serde_json::Value = json!({
|
||||
"title": "étoile",
|
||||
"author": "J. R. R. Tolkien",
|
||||
});
|
||||
|
||||
let obkv = obkv::KvReader::new(&buf);
|
||||
// we need to convert the `serde_json::Map` into an `IndexMap`.
|
||||
let document = document
|
||||
.as_object()
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k.clone(), v.clone()))
|
||||
.collect();
|
||||
|
||||
let mut formatted_options = BTreeMap::new();
|
||||
formatted_options.insert(
|
||||
@ -1104,8 +1106,8 @@ mod test {
|
||||
matching_words.insert("etoile", Some(1));
|
||||
|
||||
let value = format_fields(
|
||||
&document,
|
||||
&fields,
|
||||
obkv,
|
||||
&formatter,
|
||||
&matching_words,
|
||||
&formatted_options,
|
||||
@ -1132,25 +1134,18 @@ mod test {
|
||||
let title = fields.insert("title").unwrap();
|
||||
let author = fields.insert("author").unwrap();
|
||||
|
||||
let mut buf = Vec::new();
|
||||
let mut obkv = obkv::KvWriter::new(&mut buf);
|
||||
obkv.insert(
|
||||
title,
|
||||
Value::String("Harry Potter and the Half-Blood Prince".into())
|
||||
.to_string()
|
||||
.as_bytes(),
|
||||
)
|
||||
.unwrap();
|
||||
obkv.finish().unwrap();
|
||||
obkv = obkv::KvWriter::new(&mut buf);
|
||||
obkv.insert(
|
||||
author,
|
||||
Value::String("J. K. Rowling".into()).to_string().as_bytes(),
|
||||
)
|
||||
.unwrap();
|
||||
obkv.finish().unwrap();
|
||||
let document: serde_json::Value = json!({
|
||||
"title": "Harry Potter and the Half-Blood Prince",
|
||||
"author": "J. K. Rowling",
|
||||
});
|
||||
|
||||
let obkv = obkv::KvReader::new(&buf);
|
||||
// we need to convert the `serde_json::Map` into an `IndexMap`.
|
||||
let document = document
|
||||
.as_object()
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k.clone(), v.clone()))
|
||||
.collect();
|
||||
|
||||
let mut formatted_options = BTreeMap::new();
|
||||
formatted_options.insert(
|
||||
@ -1172,8 +1167,8 @@ mod test {
|
||||
matching_words.insert("potter", Some(3));
|
||||
|
||||
let value = format_fields(
|
||||
&document,
|
||||
&fields,
|
||||
obkv,
|
||||
&formatter,
|
||||
&matching_words,
|
||||
&formatted_options,
|
||||
@ -1200,25 +1195,18 @@ mod test {
|
||||
let title = fields.insert("title").unwrap();
|
||||
let author = fields.insert("author").unwrap();
|
||||
|
||||
let mut buf = Vec::new();
|
||||
let mut obkv = obkv::KvWriter::new(&mut buf);
|
||||
obkv.insert(
|
||||
title,
|
||||
Value::String("Harry Potter and the Half-Blood Prince".into())
|
||||
.to_string()
|
||||
.as_bytes(),
|
||||
)
|
||||
.unwrap();
|
||||
obkv.finish().unwrap();
|
||||
obkv = obkv::KvWriter::new(&mut buf);
|
||||
obkv.insert(
|
||||
author,
|
||||
Value::String("J. K. Rowling".into()).to_string().as_bytes(),
|
||||
)
|
||||
.unwrap();
|
||||
obkv.finish().unwrap();
|
||||
let document: serde_json::Value = json!({
|
||||
"title": "Harry Potter and the Half-Blood Prince",
|
||||
"author": "J. K. Rowling",
|
||||
});
|
||||
|
||||
let obkv = obkv::KvReader::new(&buf);
|
||||
// we need to convert the `serde_json::Map` into an `IndexMap`.
|
||||
let document = document
|
||||
.as_object()
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k.clone(), v.clone()))
|
||||
.collect();
|
||||
|
||||
let mut formatted_options = BTreeMap::new();
|
||||
formatted_options.insert(
|
||||
@ -1240,8 +1228,8 @@ mod test {
|
||||
matching_words.insert("potter", Some(5));
|
||||
|
||||
let value = format_fields(
|
||||
&document,
|
||||
&fields,
|
||||
obkv,
|
||||
&formatter,
|
||||
&matching_words,
|
||||
&formatted_options,
|
||||
@ -1268,25 +1256,18 @@ mod test {
|
||||
let title = fields.insert("title").unwrap();
|
||||
let author = fields.insert("author").unwrap();
|
||||
|
||||
let mut buf = Vec::new();
|
||||
let mut obkv = obkv::KvWriter::new(&mut buf);
|
||||
obkv.insert(
|
||||
title,
|
||||
Value::String("Harry Potter and the Half-Blood Prince".into())
|
||||
.to_string()
|
||||
.as_bytes(),
|
||||
)
|
||||
.unwrap();
|
||||
obkv.finish().unwrap();
|
||||
obkv = obkv::KvWriter::new(&mut buf);
|
||||
obkv.insert(
|
||||
author,
|
||||
Value::String("J. K. Rowling".into()).to_string().as_bytes(),
|
||||
)
|
||||
.unwrap();
|
||||
obkv.finish().unwrap();
|
||||
let document: serde_json::Value = json!({
|
||||
"title": "Harry Potter and the Half-Blood Prince",
|
||||
"author": "J. K. Rowling",
|
||||
});
|
||||
|
||||
let obkv = obkv::KvReader::new(&buf);
|
||||
// we need to convert the `serde_json::Map` into an `IndexMap`.
|
||||
let document = document
|
||||
.as_object()
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k.clone(), v.clone()))
|
||||
.collect();
|
||||
|
||||
let mut formatted_options = BTreeMap::new();
|
||||
formatted_options.insert(
|
||||
@ -1308,8 +1289,8 @@ mod test {
|
||||
matching_words.insert("potter", Some(6));
|
||||
|
||||
let value = format_fields(
|
||||
&document,
|
||||
&fields,
|
||||
obkv,
|
||||
&formatter,
|
||||
&matching_words,
|
||||
&formatted_options,
|
||||
@ -1336,25 +1317,18 @@ mod test {
|
||||
let title = fields.insert("title").unwrap();
|
||||
let author = fields.insert("author").unwrap();
|
||||
|
||||
let mut buf = Vec::new();
|
||||
let mut obkv = obkv::KvWriter::new(&mut buf);
|
||||
obkv.insert(
|
||||
title,
|
||||
Value::String("Harry Potter and the Half-Blood Prince".into())
|
||||
.to_string()
|
||||
.as_bytes(),
|
||||
)
|
||||
.unwrap();
|
||||
obkv.finish().unwrap();
|
||||
obkv = obkv::KvWriter::new(&mut buf);
|
||||
obkv.insert(
|
||||
author,
|
||||
Value::String("J. K. Rowling".into()).to_string().as_bytes(),
|
||||
)
|
||||
.unwrap();
|
||||
obkv.finish().unwrap();
|
||||
let document: serde_json::Value = json!({
|
||||
"title": "Harry Potter and the Half-Blood Prince",
|
||||
"author": "J. K. Rowling",
|
||||
});
|
||||
|
||||
let obkv = obkv::KvReader::new(&buf);
|
||||
// we need to convert the `serde_json::Map` into an `IndexMap`.
|
||||
let document = document
|
||||
.as_object()
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k.clone(), v.clone()))
|
||||
.collect();
|
||||
|
||||
let mut formatted_options = BTreeMap::new();
|
||||
formatted_options.insert(
|
||||
@ -1376,8 +1350,8 @@ mod test {
|
||||
matching_words.insert("rowling", Some(3));
|
||||
|
||||
let value = format_fields(
|
||||
&document,
|
||||
&fields,
|
||||
obkv,
|
||||
&formatter,
|
||||
&matching_words,
|
||||
&formatted_options,
|
||||
@ -1404,25 +1378,18 @@ mod test {
|
||||
let title = fields.insert("title").unwrap();
|
||||
let author = fields.insert("author").unwrap();
|
||||
|
||||
let mut buf = Vec::new();
|
||||
let mut obkv = obkv::KvWriter::new(&mut buf);
|
||||
obkv.insert(
|
||||
title,
|
||||
Value::String("Harry Potter and the Half-Blood Prince".into())
|
||||
.to_string()
|
||||
.as_bytes(),
|
||||
)
|
||||
.unwrap();
|
||||
obkv.finish().unwrap();
|
||||
obkv = obkv::KvWriter::new(&mut buf);
|
||||
obkv.insert(
|
||||
author,
|
||||
Value::String("J. K. Rowling".into()).to_string().as_bytes(),
|
||||
)
|
||||
.unwrap();
|
||||
obkv.finish().unwrap();
|
||||
let document: serde_json::Value = json!({
|
||||
"title": "Harry Potter and the Half-Blood Prince",
|
||||
"author": "J. K. Rowling",
|
||||
});
|
||||
|
||||
let obkv = obkv::KvReader::new(&buf);
|
||||
// we need to convert the `serde_json::Map` into an `IndexMap`.
|
||||
let document = document
|
||||
.as_object()
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k.clone(), v.clone()))
|
||||
.collect();
|
||||
|
||||
let mut formatted_options = BTreeMap::new();
|
||||
formatted_options.insert(
|
||||
@ -1444,8 +1411,8 @@ mod test {
|
||||
matching_words.insert("and", Some(3));
|
||||
|
||||
let value = format_fields(
|
||||
&document,
|
||||
&fields,
|
||||
obkv,
|
||||
&formatter,
|
||||
&matching_words,
|
||||
&formatted_options,
|
||||
@ -1472,25 +1439,18 @@ mod test {
|
||||
let title = fields.insert("title").unwrap();
|
||||
let author = fields.insert("author").unwrap();
|
||||
|
||||
let mut buf = Vec::new();
|
||||
let mut obkv = obkv::KvWriter::new(&mut buf);
|
||||
obkv.insert(
|
||||
title,
|
||||
Value::String("Harry Potter and the Half-Blood Prince".into())
|
||||
.to_string()
|
||||
.as_bytes(),
|
||||
)
|
||||
.unwrap();
|
||||
obkv.finish().unwrap();
|
||||
obkv = obkv::KvWriter::new(&mut buf);
|
||||
obkv.insert(
|
||||
author,
|
||||
Value::String("J. K. Rowling".into()).to_string().as_bytes(),
|
||||
)
|
||||
.unwrap();
|
||||
obkv.finish().unwrap();
|
||||
let document: serde_json::Value = json!({
|
||||
"title": "Harry Potter and the Half-Blood Prince",
|
||||
"author": "J. K. Rowling",
|
||||
});
|
||||
|
||||
let obkv = obkv::KvReader::new(&buf);
|
||||
// we need to convert the `serde_json::Map` into an `IndexMap`.
|
||||
let document = document
|
||||
.as_object()
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k.clone(), v.clone()))
|
||||
.collect();
|
||||
|
||||
let mut formatted_options = BTreeMap::new();
|
||||
formatted_options.insert(
|
||||
@ -1512,8 +1472,8 @@ mod test {
|
||||
matching_words.insert("blood", Some(3));
|
||||
|
||||
let value = format_fields(
|
||||
&document,
|
||||
&fields,
|
||||
obkv,
|
||||
&formatter,
|
||||
&matching_words,
|
||||
&formatted_options,
|
||||
|
@ -286,7 +286,7 @@ impl Index {
|
||||
self.indexer_config.as_ref(),
|
||||
config,
|
||||
indexing_callback,
|
||||
);
|
||||
)?;
|
||||
|
||||
for content_uuid in contents.into_iter() {
|
||||
let content_file = file_store.get_update(content_uuid)?;
|
||||
|
Loading…
x
Reference in New Issue
Block a user