2298: Nested fields r=irevoire a=irevoire

There are a few things that I want to fix _AFTER_ merging this PR.
For the following RCs.

## Stop the useless conversion
In the `search.rs` I convert a `Document` to a `Value`, and then the `Value` to a `Document` and then back to a `Value` etc. I should stop doing all these conversion and stick to one format.
Probably by merging my `permissive-json-pointer` crate into meilisearch.
That would also give me the opportunity to work directly with obkvs and stops deserializing fields I don't need.

## Add more test specific to the nested
Everything seems to works but I should write tests to double check that the nested works well with the `formatted` field.

## See how I could stop iterating on hashmap and instead fill them correctly
This is related to milli. I really often needs to iterate over hashmap to see if a field is a subset of another field. I could probably generate a structure containing all the possible key values.
ie. the user say `doggo` is an attribute to retrieve. Instead of iterating on all the attributes to retrieve to check if `doggo.name` is a subset of `doggo`. I should insert `doggo.name` in the attributes to retrieve map.

Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
bors[bot] 2022-04-11 11:45:37 +00:00 committed by GitHub
commit 31584f34e8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 501 additions and 261 deletions

33
Cargo.lock generated
View File

@ -1087,7 +1087,7 @@ dependencies = [
[[package]] [[package]]
name = "filter-parser" name = "filter-parser"
version = "0.1.0" version = "0.1.0"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.25.0#4ae7aea3b274a86780754dc8bebb36e06501f894" source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.0#9ac2fd1c379d5b91c80471c23079dbba57b9a841"
dependencies = [ dependencies = [
"nom", "nom",
"nom_locate", "nom_locate",
@ -1111,6 +1111,14 @@ dependencies = [
"miniz_oxide", "miniz_oxide",
] ]
[[package]]
name = "flatten-serde-json"
version = "0.1.0"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.0#9ac2fd1c379d5b91c80471c23079dbba57b9a841"
dependencies = [
"serde_json",
]
[[package]] [[package]]
name = "float-cmp" name = "float-cmp"
version = "0.9.0" version = "0.9.0"
@ -1643,9 +1651,9 @@ dependencies = [
[[package]] [[package]]
name = "libc" name = "libc"
version = "0.2.121" version = "0.2.122"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "efaa7b300f3b5fe8eb6bf21ce3895e1751d9665086af2d64b42f19701015ff4f" checksum = "ec647867e2bf0772e28c8bcde4f0d19a9216916e890543b5a03ed8ef27b8f259"
[[package]] [[package]]
name = "libgit2-sys" name = "libgit2-sys"
@ -2062,6 +2070,7 @@ dependencies = [
"once_cell", "once_cell",
"parking_lot", "parking_lot",
"paste", "paste",
"permissive-json-pointer",
"proptest", "proptest",
"proptest-derive", "proptest-derive",
"rand", "rand",
@ -2128,8 +2137,8 @@ dependencies = [
[[package]] [[package]]
name = "milli" name = "milli"
version = "0.25.0" version = "0.26.0"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.25.0#4ae7aea3b274a86780754dc8bebb36e06501f894" source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.0#9ac2fd1c379d5b91c80471c23079dbba57b9a841"
dependencies = [ dependencies = [
"bimap", "bimap",
"bincode", "bincode",
@ -2140,6 +2149,7 @@ dependencies = [
"csv", "csv",
"either", "either",
"filter-parser", "filter-parser",
"flatten-serde-json",
"fst", "fst",
"fxhash", "fxhash",
"geoutils", "geoutils",
@ -2463,6 +2473,15 @@ version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e"
[[package]]
name = "permissive-json-pointer"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2125f5fc44a45ffd265ce6ab343842f71df469d173f923f234e3a8df7a8f1ba6"
dependencies = [
"serde_json",
]
[[package]] [[package]]
name = "phf" name = "phf"
version = "0.10.1" version = "0.10.1"
@ -3797,9 +3816,9 @@ dependencies = [
[[package]] [[package]]
name = "webpki-roots" name = "webpki-roots"
version = "0.22.2" version = "0.22.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "552ceb903e957524388c4d3475725ff2c8b7960922063af6ce53c9a43da07449" checksum = "44d8de8415c823c8abd270ad483c6feeac771fad964890779f9a8cb24fbbc1bf"
dependencies = [ dependencies = [
"webpki", "webpki",
] ]

View File

@ -6,7 +6,7 @@ edition = "2021"
[dependencies] [dependencies]
enum-iterator = "0.7.0" enum-iterator = "0.7.0"
meilisearch-error = { path = "../meilisearch-error" } meilisearch-error = { path = "../meilisearch-error" }
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.25.0" } milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.26.0" }
rand = "0.8.4" rand = "0.8.4"
serde = { version = "1.0.136", features = ["derive"] } serde = { version = "1.0.136", features = ["derive"] }
serde_json = { version = "1.0.79", features = ["preserve_order"] } serde_json = { version = "1.0.79", features = ["preserve_order"] }

View File

@ -70,11 +70,9 @@ impl<P, D> GuardedData<P, D> {
where where
P: Policy + 'static, P: Policy + 'static,
{ {
Ok(tokio::task::spawn_blocking(move || { tokio::task::spawn_blocking(move || P::authenticate(auth, token.as_ref(), index.as_deref()))
P::authenticate(auth, token.as_ref(), index.as_deref()) .await
}) .map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))
.await
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))?)
} }
} }

View File

@ -1013,7 +1013,7 @@ async fn error_add_documents_invalid_geo_field() {
assert_eq!(response["status"], "failed"); assert_eq!(response["status"], "failed");
let expected_error = json!({ let expected_error = json!({
"message": r#"The document with the id: `11` contains an invalid _geo field: `foobar`."#, "message": r#"The document with the id: `11` contains an invalid `_geo` field."#,
"code": "invalid_geo_field", "code": "invalid_geo_field",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_geo_field" "link": "https://docs.meilisearch.com/errors#invalid_geo_field"

View File

@ -155,7 +155,7 @@ async fn test_get_all_documents_offset() {
.await; .await;
assert_eq!(code, 200); assert_eq!(code, 200);
assert_eq!(response.as_array().unwrap().len(), 20); assert_eq!(response.as_array().unwrap().len(), 20);
assert_eq!(response.as_array().unwrap()[0]["id"], 13); assert_eq!(response.as_array().unwrap()[0]["id"], 5);
} }
#[actix_rt::test] #[actix_rt::test]

View File

@ -11,29 +11,86 @@ static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([ json!([
{ {
"title": "Shazam!", "title": "Shazam!",
"id": "287947" "id": "287947",
}, },
{ {
"title": "Captain Marvel", "title": "Captain Marvel",
"id": "299537" "id": "299537",
}, },
{ {
"title": "Escape Room", "title": "Escape Room",
"id": "522681" "id": "522681",
}, },
{ "title": "How to Train Your Dragon: The Hidden World", "id": "166428" {
"title": "How to Train Your Dragon: The Hidden World",
"id": "166428",
}, },
{ {
"title": "Glass", "title": "Glass",
"id": "450465" "id": "450465",
} }
]) ])
}); });
static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
"id": 852,
"father": "jean",
"mother": "michelle",
"doggos": [
{
"name": "bobby",
"age": 2,
},
{
"name": "buddy",
"age": 4,
},
],
"cattos": "pesti",
},
{
"id": 654,
"father": "pierre",
"mother": "sabine",
"doggos": [
{
"name": "gros bill",
"age": 8,
},
],
"cattos": ["simba", "pestiféré"],
},
{
"id": 750,
"father": "romain",
"mother": "michelle",
"cattos": ["enigma"],
},
{
"id": 951,
"father": "jean-baptiste",
"mother": "sophie",
"doggos": [
{
"name": "turbo",
"age": 5,
},
{
"name": "fast",
"age": 6,
},
],
"cattos": ["moumoute", "gomez"],
},
])
});
#[actix_rt::test] #[actix_rt::test]
async fn simple_placeholder_search() { async fn simple_placeholder_search() {
let server = Server::new().await; let server = Server::new().await;
let index = server.index("test"); let index = server.index("basic");
let documents = DOCUMENTS.clone(); let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await; index.add_documents(documents, None).await;
@ -45,6 +102,18 @@ async fn simple_placeholder_search() {
assert_eq!(response["hits"].as_array().unwrap().len(), 5); assert_eq!(response["hits"].as_array().unwrap().len(), 5);
}) })
.await; .await;
let index = server.index("nested");
let documents = NESTED_DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
index
.search(json!({}), |response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 4);
})
.await;
} }
#[actix_rt::test] #[actix_rt::test]
@ -62,6 +131,18 @@ async fn simple_search() {
assert_eq!(response["hits"].as_array().unwrap().len(), 1); assert_eq!(response["hits"].as_array().unwrap().len(), 1);
}) })
.await; .await;
let index = server.index("nested");
let documents = NESTED_DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
index
.search(json!({"q": "pesti"}), |response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 2);
})
.await;
} }
#[actix_rt::test] #[actix_rt::test]
@ -88,6 +169,27 @@ async fn search_multiple_params() {
}, },
) )
.await; .await;
let index = server.index("nested");
let documents = NESTED_DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
index
.search(
json!({
"q": "pesti",
"attributesToCrop": ["catto:2"],
"attributesToHighlight": ["catto"],
"limit": 2,
"offset": 0,
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 2);
},
)
.await;
} }
#[actix_rt::test] #[actix_rt::test]
@ -114,6 +216,43 @@ async fn search_with_filter_string_notation() {
}, },
) )
.await; .await;
let index = server.index("nested");
index
.update_settings(json!({"filterableAttributes": ["cattos", "doggos.age"]}))
.await;
let documents = NESTED_DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(3).await;
index
.search(
json!({
"filter": "cattos = pesti"
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 1);
assert_eq!(response["hits"][0]["id"], json!(852));
},
)
.await;
index
.search(
json!({
"filter": "doggos.age > 5"
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 2);
assert_eq!(response["hits"][0]["id"], json!(654));
assert_eq!(response["hits"][1]["id"], json!(951));
},
)
.await;
} }
#[actix_rt::test] #[actix_rt::test]
@ -170,6 +309,28 @@ async fn search_with_sort_on_numbers() {
}, },
) )
.await; .await;
let index = server.index("nested");
index
.update_settings(json!({"sortableAttributes": ["doggos.age"]}))
.await;
let documents = NESTED_DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(3).await;
index
.search(
json!({
"sort": ["doggos.age:asc"]
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 4);
},
)
.await;
} }
#[actix_rt::test] #[actix_rt::test]
@ -196,6 +357,28 @@ async fn search_with_sort_on_strings() {
}, },
) )
.await; .await;
let index = server.index("nested");
index
.update_settings(json!({"sortableAttributes": ["doggos.name"]}))
.await;
let documents = NESTED_DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(3).await;
index
.search(
json!({
"sort": ["doggos.name:asc"]
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 4);
},
)
.await;
} }
#[actix_rt::test] #[actix_rt::test]
@ -246,6 +429,85 @@ async fn search_facet_distribution() {
}, },
) )
.await; .await;
let index = server.index("nested");
index
.update_settings(json!({"filterableAttributes": ["father", "doggos.name"]}))
.await;
let documents = NESTED_DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(3).await;
// TODO: TAMO: fix the test
index
.search(
json!({
// "facetsDistribution": ["father", "doggos.name"]
"facetsDistribution": ["father"]
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
let dist = response["facetsDistribution"].as_object().unwrap();
assert_eq!(dist.len(), 1);
assert_eq!(
dist["father"],
json!({ "jean": 1, "pierre": 1, "romain": 1, "jean-baptiste": 1})
);
/*
assert_eq!(
dist["doggos.name"],
json!({ "bobby": 1, "buddy": 1, "gros bill": 1, "turbo": 1, "fast": 1})
);
*/
},
)
.await;
index
.update_settings(json!({"filterableAttributes": ["doggos"]}))
.await;
index.wait_task(4).await;
index
.search(
json!({
"facetsDistribution": ["doggos.name"]
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
let dist = response["facetsDistribution"].as_object().unwrap();
assert_eq!(dist.len(), 1);
assert_eq!(
dist["doggos.name"],
json!({ "bobby": 1, "buddy": 1, "gros bill": 1, "turbo": 1, "fast": 1})
);
},
)
.await;
index
.search(
json!({
"facetsDistribution": ["doggos"]
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
let dist = response["facetsDistribution"].as_object().unwrap();
dbg!(&dist);
assert_eq!(dist.len(), 2);
assert_eq!(
dist["doggos.name"],
json!({ "bobby": 1, "buddy": 1, "gros bill": 1, "turbo": 1, "fast": 1})
);
assert_eq!(
dist["doggos.age"],
json!({ "2": 1, "4": 1, "5": 1, "6": 1, "8": 1})
);
},
)
.await;
} }
#[actix_rt::test] #[actix_rt::test]
@ -265,7 +527,7 @@ async fn displayed_attributes() {
.search_post(json!({ "attributesToRetrieve": ["title", "id"] })) .search_post(json!({ "attributesToRetrieve": ["title", "id"] }))
.await; .await;
assert_eq!(code, 200, "{}", response); assert_eq!(code, 200, "{}", response);
assert!(response["hits"].get("title").is_none()); assert!(response["hits"][0].get("title").is_some());
} }
#[actix_rt::test] #[actix_rt::test]

View File

@ -30,12 +30,13 @@ lazy_static = "1.4.0"
log = "0.4.14" log = "0.4.14"
meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-error = { path = "../meilisearch-error" } meilisearch-error = { path = "../meilisearch-error" }
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.25.0" } milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.26.0" }
mime = "0.3.16" mime = "0.3.16"
num_cpus = "1.13.1" num_cpus = "1.13.1"
obkv = "0.2.0" obkv = "0.2.0"
once_cell = "1.10.0" once_cell = "1.10.0"
parking_lot = "0.12.0" parking_lot = "0.12.0"
permissive-json-pointer = "0.2.0"
rand = "0.8.5" rand = "0.8.5"
rayon = "1.5.1" rayon = "1.5.1"
regex = "1.5.5" regex = "1.5.5"

View File

@ -146,7 +146,7 @@ impl Index {
indexer_config, indexer_config,
config, config,
|_| (), |_| (),
); )?;
builder.add_documents(documents_reader)?; builder.add_documents(documents_reader)?;
builder.execute()?; builder.execute()?;
} }

View File

@ -106,12 +106,21 @@ pub struct SearchResult {
pub exhaustive_facets_count: Option<bool>, pub exhaustive_facets_count: Option<bool>,
} }
#[derive(Copy, Clone)] #[derive(Copy, Clone, Default)]
struct FormatOptions { struct FormatOptions {
highlight: bool, highlight: bool,
crop: Option<usize>, crop: Option<usize>,
} }
impl FormatOptions {
pub fn merge(self, other: Self) -> Self {
Self {
highlight: self.highlight || other.highlight,
crop: self.crop.or(other.crop),
}
}
}
impl Index { impl Index {
pub fn perform_search(&self, query: SearchQuery) -> Result<SearchResult> { pub fn perform_search(&self, query: SearchQuery) -> Result<SearchResult> {
let before_search = Instant::now(); let before_search = Instant::now();
@ -231,8 +240,8 @@ impl Index {
.then(|| compute_matches(&matching_words, &document, &analyzer)); .then(|| compute_matches(&matching_words, &document, &analyzer));
let formatted = format_fields( let formatted = format_fields(
&document,
&fields_ids_map, &fields_ids_map,
obkv,
&formatter, &formatter,
&matching_words, &matching_words,
&formatted_options, &formatted_options,
@ -471,50 +480,74 @@ fn make_document(
field_ids_map: &FieldsIdsMap, field_ids_map: &FieldsIdsMap,
obkv: obkv::KvReaderU16, obkv: obkv::KvReaderU16,
) -> Result<Document> { ) -> Result<Document> {
let mut document = Document::new(); let mut document = serde_json::Map::new();
for attr in attributes_to_retrieve { // recreate the original json
if let Some(value) = obkv.get(*attr) { for (key, value) in obkv.iter() {
let value = serde_json::from_slice(value)?; let value = serde_json::from_slice(value)?;
let key = field_ids_map
.name(key)
.expect("Missing field name")
.to_string();
// This unwrap must be safe since we got the ids from the fields_ids_map just document.insert(key, value);
// before.
let key = field_ids_map
.name(*attr)
.expect("Missing field name")
.to_string();
document.insert(key, value);
}
} }
// select the attributes to retrieve
let attributes_to_retrieve = attributes_to_retrieve
.iter()
.map(|&fid| field_ids_map.name(fid).expect("Missing field name"));
let document = permissive_json_pointer::select_values(&document, attributes_to_retrieve);
// then we need to convert the `serde_json::Map` into an `IndexMap`.
let document = document.into_iter().collect();
Ok(document) Ok(document)
} }
fn format_fields<A: AsRef<[u8]>>( fn format_fields<A: AsRef<[u8]>>(
document: &Document,
field_ids_map: &FieldsIdsMap, field_ids_map: &FieldsIdsMap,
obkv: obkv::KvReaderU16,
formatter: &Formatter<A>, formatter: &Formatter<A>,
matching_words: &impl Matcher, matching_words: &impl Matcher,
formatted_options: &BTreeMap<FieldId, FormatOptions>, formatted_options: &BTreeMap<FieldId, FormatOptions>,
) -> Result<Document> { ) -> Result<Document> {
let mut document = Document::new(); // Convert the `IndexMap` into a `serde_json::Map`.
let document = document
.iter()
.map(|(k, v)| (k.clone(), v.clone()))
.collect();
for (id, format) in formatted_options { let selectors: Vec<_> = formatted_options
if let Some(value) = obkv.get(*id) { .keys()
let mut value: Value = serde_json::from_slice(value)?; // This unwrap must be safe since we got the ids from the fields_ids_map just
// before.
.map(|&fid| field_ids_map.name(fid).unwrap())
.collect();
value = formatter.format_value(value, matching_words, *format); let mut document = permissive_json_pointer::select_values(&document, selectors.iter().copied());
// This unwrap must be safe since we got the ids from the fields_ids_map just permissive_json_pointer::map_leaf_values(&mut document, selectors, |key, value| {
// before. // To get the formatting option of each key we need to see all the rules that applies
let key = field_ids_map // to the value and merge them together. eg. If a user said he wanted to highlight `doggo`
.name(*id) // and crop `doggo.name`. `doggo.name` needs to be highlighted + cropped while `doggo.age` is only
.expect("Missing field name") // highlighted.
.to_string(); let format = formatted_options
.iter()
.filter(|(field, _option)| {
let name = field_ids_map.name(**field).unwrap();
milli::is_faceted_by(name, key) || milli::is_faceted_by(key, name)
})
.fold(FormatOptions::default(), |acc, (_, option)| {
acc.merge(*option)
});
// TODO: remove this useless clone
*value = formatter.format_value(value.clone(), matching_words, format);
});
document.insert(key, value); // we need to convert back the `serde_json::Map` into an `IndexMap`.
} let document = document.into_iter().collect();
}
Ok(document) Ok(document)
} }
@ -798,23 +831,27 @@ mod test {
); );
let mut fields = FieldsIdsMap::new(); let mut fields = FieldsIdsMap::new();
let id = fields.insert("test").unwrap(); fields.insert("test").unwrap();
let mut buf = Vec::new(); let document: serde_json::Value = json!({
let mut obkv = obkv::KvWriter::new(&mut buf); "test": "hello",
obkv.insert(id, Value::String("hello".into()).to_string().as_bytes()) });
.unwrap();
obkv.finish().unwrap();
let obkv = obkv::KvReader::new(&buf); // we need to convert the `serde_json::Map` into an `IndexMap`.
let document = document
.as_object()
.unwrap()
.into_iter()
.map(|(k, v)| (k.clone(), v.clone()))
.collect();
let formatted_options = BTreeMap::new(); let formatted_options = BTreeMap::new();
let matching_words = MatchingWords::default(); let matching_words = MatchingWords::default();
let value = format_fields( let value = format_fields(
&document,
&fields, &fields,
obkv,
&formatter, &formatter,
&matching_words, &matching_words,
&formatted_options, &formatted_options,
@ -840,25 +877,18 @@ mod test {
let title = fields.insert("title").unwrap(); let title = fields.insert("title").unwrap();
let author = fields.insert("author").unwrap(); let author = fields.insert("author").unwrap();
let mut buf = Vec::new(); let document: serde_json::Value = json!({
let mut obkv = obkv::KvWriter::new(&mut buf); "title": "The Hobbit",
obkv.insert( "author": "J. R. R. Tolkien",
title, });
Value::String("The Hobbit".into()).to_string().as_bytes(),
)
.unwrap();
obkv.finish().unwrap();
obkv = obkv::KvWriter::new(&mut buf);
obkv.insert(
author,
Value::String("J. R. R. Tolkien".into())
.to_string()
.as_bytes(),
)
.unwrap();
obkv.finish().unwrap();
let obkv = obkv::KvReader::new(&buf); // we need to convert the `serde_json::Map` into an `IndexMap`.
let document = document
.as_object()
.unwrap()
.into_iter()
.map(|(k, v)| (k.clone(), v.clone()))
.collect();
let mut formatted_options = BTreeMap::new(); let mut formatted_options = BTreeMap::new();
formatted_options.insert( formatted_options.insert(
@ -880,8 +910,8 @@ mod test {
matching_words.insert("hobbit", Some(3)); matching_words.insert("hobbit", Some(3));
let value = format_fields( let value = format_fields(
&document,
&fields, &fields,
obkv,
&formatter, &formatter,
&matching_words, &matching_words,
&formatted_options, &formatted_options,
@ -909,38 +939,19 @@ mod test {
let author = fields.insert("author").unwrap(); let author = fields.insert("author").unwrap();
let publication_year = fields.insert("publication_year").unwrap(); let publication_year = fields.insert("publication_year").unwrap();
let mut buf = Vec::new(); let document: serde_json::Value = json!({
let mut obkv = obkv::KvWriter::new(&mut buf); "title": "The Hobbit",
"author": "J. R. R. Tolkien",
"publication_year": 1937,
});
obkv.insert( // we need to convert the `serde_json::Map` into an `IndexMap`.
title, let document = document
Value::String("The Hobbit".into()).to_string().as_bytes(), .as_object()
) .unwrap()
.unwrap(); .into_iter()
.map(|(k, v)| (k.clone(), v.clone()))
obkv.finish().unwrap(); .collect();
obkv = obkv::KvWriter::new(&mut buf);
obkv.insert(
author,
Value::String("J. R. R. Tolkien".into())
.to_string()
.as_bytes(),
)
.unwrap();
obkv.finish().unwrap();
obkv = obkv::KvWriter::new(&mut buf);
obkv.insert(
publication_year,
Value::Number(1937.into()).to_string().as_bytes(),
)
.unwrap();
obkv.finish().unwrap();
let obkv = obkv::KvReader::new(&buf);
let mut formatted_options = BTreeMap::new(); let mut formatted_options = BTreeMap::new();
formatted_options.insert( formatted_options.insert(
@ -969,8 +980,8 @@ mod test {
matching_words.insert("1937", Some(4)); matching_words.insert("1937", Some(4));
let value = format_fields( let value = format_fields(
&document,
&fields, &fields,
obkv,
&formatter, &formatter,
&matching_words, &matching_words,
&formatted_options, &formatted_options,
@ -999,23 +1010,18 @@ mod test {
let title = fields.insert("title").unwrap(); let title = fields.insert("title").unwrap();
let author = fields.insert("author").unwrap(); let author = fields.insert("author").unwrap();
let mut buf = Vec::new(); let document: serde_json::Value = json!({
let mut obkv = obkv::KvWriter::new(&mut buf); "title": "Go💼od luck.",
obkv.insert( "author": "JacobLey",
title, });
Value::String("Go💼od luck.".into()).to_string().as_bytes(),
)
.unwrap();
obkv.finish().unwrap();
obkv = obkv::KvWriter::new(&mut buf);
obkv.insert(
author,
Value::String("JacobLey".into()).to_string().as_bytes(),
)
.unwrap();
obkv.finish().unwrap();
let obkv = obkv::KvReader::new(&buf); // we need to convert the `serde_json::Map` into an `IndexMap`.
let document = document
.as_object()
.unwrap()
.into_iter()
.map(|(k, v)| (k.clone(), v.clone()))
.collect();
let mut formatted_options = BTreeMap::new(); let mut formatted_options = BTreeMap::new();
formatted_options.insert( formatted_options.insert(
@ -1039,8 +1045,8 @@ mod test {
matching_words.insert("gobriefcase od", Some(11)); matching_words.insert("gobriefcase od", Some(11));
let value = format_fields( let value = format_fields(
&document,
&fields, &fields,
obkv,
&formatter, &formatter,
&matching_words, &matching_words,
&formatted_options, &formatted_options,
@ -1067,22 +1073,18 @@ mod test {
let title = fields.insert("title").unwrap(); let title = fields.insert("title").unwrap();
let author = fields.insert("author").unwrap(); let author = fields.insert("author").unwrap();
let mut buf = Vec::new(); let document: serde_json::Value = json!({
let mut obkv = obkv::KvWriter::new(&mut buf); "title": "étoile",
obkv.insert(title, Value::String("étoile".into()).to_string().as_bytes()) "author": "J. R. R. Tolkien",
.unwrap(); });
obkv.finish().unwrap();
obkv = obkv::KvWriter::new(&mut buf);
obkv.insert(
author,
Value::String("J. R. R. Tolkien".into())
.to_string()
.as_bytes(),
)
.unwrap();
obkv.finish().unwrap();
let obkv = obkv::KvReader::new(&buf); // we need to convert the `serde_json::Map` into an `IndexMap`.
let document = document
.as_object()
.unwrap()
.into_iter()
.map(|(k, v)| (k.clone(), v.clone()))
.collect();
let mut formatted_options = BTreeMap::new(); let mut formatted_options = BTreeMap::new();
formatted_options.insert( formatted_options.insert(
@ -1104,8 +1106,8 @@ mod test {
matching_words.insert("etoile", Some(1)); matching_words.insert("etoile", Some(1));
let value = format_fields( let value = format_fields(
&document,
&fields, &fields,
obkv,
&formatter, &formatter,
&matching_words, &matching_words,
&formatted_options, &formatted_options,
@ -1132,25 +1134,18 @@ mod test {
let title = fields.insert("title").unwrap(); let title = fields.insert("title").unwrap();
let author = fields.insert("author").unwrap(); let author = fields.insert("author").unwrap();
let mut buf = Vec::new(); let document: serde_json::Value = json!({
let mut obkv = obkv::KvWriter::new(&mut buf); "title": "Harry Potter and the Half-Blood Prince",
obkv.insert( "author": "J. K. Rowling",
title, });
Value::String("Harry Potter and the Half-Blood Prince".into())
.to_string()
.as_bytes(),
)
.unwrap();
obkv.finish().unwrap();
obkv = obkv::KvWriter::new(&mut buf);
obkv.insert(
author,
Value::String("J. K. Rowling".into()).to_string().as_bytes(),
)
.unwrap();
obkv.finish().unwrap();
let obkv = obkv::KvReader::new(&buf); // we need to convert the `serde_json::Map` into an `IndexMap`.
let document = document
.as_object()
.unwrap()
.into_iter()
.map(|(k, v)| (k.clone(), v.clone()))
.collect();
let mut formatted_options = BTreeMap::new(); let mut formatted_options = BTreeMap::new();
formatted_options.insert( formatted_options.insert(
@ -1172,8 +1167,8 @@ mod test {
matching_words.insert("potter", Some(3)); matching_words.insert("potter", Some(3));
let value = format_fields( let value = format_fields(
&document,
&fields, &fields,
obkv,
&formatter, &formatter,
&matching_words, &matching_words,
&formatted_options, &formatted_options,
@ -1200,25 +1195,18 @@ mod test {
let title = fields.insert("title").unwrap(); let title = fields.insert("title").unwrap();
let author = fields.insert("author").unwrap(); let author = fields.insert("author").unwrap();
let mut buf = Vec::new(); let document: serde_json::Value = json!({
let mut obkv = obkv::KvWriter::new(&mut buf); "title": "Harry Potter and the Half-Blood Prince",
obkv.insert( "author": "J. K. Rowling",
title, });
Value::String("Harry Potter and the Half-Blood Prince".into())
.to_string()
.as_bytes(),
)
.unwrap();
obkv.finish().unwrap();
obkv = obkv::KvWriter::new(&mut buf);
obkv.insert(
author,
Value::String("J. K. Rowling".into()).to_string().as_bytes(),
)
.unwrap();
obkv.finish().unwrap();
let obkv = obkv::KvReader::new(&buf); // we need to convert the `serde_json::Map` into an `IndexMap`.
let document = document
.as_object()
.unwrap()
.into_iter()
.map(|(k, v)| (k.clone(), v.clone()))
.collect();
let mut formatted_options = BTreeMap::new(); let mut formatted_options = BTreeMap::new();
formatted_options.insert( formatted_options.insert(
@ -1240,8 +1228,8 @@ mod test {
matching_words.insert("potter", Some(5)); matching_words.insert("potter", Some(5));
let value = format_fields( let value = format_fields(
&document,
&fields, &fields,
obkv,
&formatter, &formatter,
&matching_words, &matching_words,
&formatted_options, &formatted_options,
@ -1268,25 +1256,18 @@ mod test {
let title = fields.insert("title").unwrap(); let title = fields.insert("title").unwrap();
let author = fields.insert("author").unwrap(); let author = fields.insert("author").unwrap();
let mut buf = Vec::new(); let document: serde_json::Value = json!({
let mut obkv = obkv::KvWriter::new(&mut buf); "title": "Harry Potter and the Half-Blood Prince",
obkv.insert( "author": "J. K. Rowling",
title, });
Value::String("Harry Potter and the Half-Blood Prince".into())
.to_string()
.as_bytes(),
)
.unwrap();
obkv.finish().unwrap();
obkv = obkv::KvWriter::new(&mut buf);
obkv.insert(
author,
Value::String("J. K. Rowling".into()).to_string().as_bytes(),
)
.unwrap();
obkv.finish().unwrap();
let obkv = obkv::KvReader::new(&buf); // we need to convert the `serde_json::Map` into an `IndexMap`.
let document = document
.as_object()
.unwrap()
.into_iter()
.map(|(k, v)| (k.clone(), v.clone()))
.collect();
let mut formatted_options = BTreeMap::new(); let mut formatted_options = BTreeMap::new();
formatted_options.insert( formatted_options.insert(
@ -1308,8 +1289,8 @@ mod test {
matching_words.insert("potter", Some(6)); matching_words.insert("potter", Some(6));
let value = format_fields( let value = format_fields(
&document,
&fields, &fields,
obkv,
&formatter, &formatter,
&matching_words, &matching_words,
&formatted_options, &formatted_options,
@ -1336,25 +1317,18 @@ mod test {
let title = fields.insert("title").unwrap(); let title = fields.insert("title").unwrap();
let author = fields.insert("author").unwrap(); let author = fields.insert("author").unwrap();
let mut buf = Vec::new(); let document: serde_json::Value = json!({
let mut obkv = obkv::KvWriter::new(&mut buf); "title": "Harry Potter and the Half-Blood Prince",
obkv.insert( "author": "J. K. Rowling",
title, });
Value::String("Harry Potter and the Half-Blood Prince".into())
.to_string()
.as_bytes(),
)
.unwrap();
obkv.finish().unwrap();
obkv = obkv::KvWriter::new(&mut buf);
obkv.insert(
author,
Value::String("J. K. Rowling".into()).to_string().as_bytes(),
)
.unwrap();
obkv.finish().unwrap();
let obkv = obkv::KvReader::new(&buf); // we need to convert the `serde_json::Map` into an `IndexMap`.
let document = document
.as_object()
.unwrap()
.into_iter()
.map(|(k, v)| (k.clone(), v.clone()))
.collect();
let mut formatted_options = BTreeMap::new(); let mut formatted_options = BTreeMap::new();
formatted_options.insert( formatted_options.insert(
@ -1376,8 +1350,8 @@ mod test {
matching_words.insert("rowling", Some(3)); matching_words.insert("rowling", Some(3));
let value = format_fields( let value = format_fields(
&document,
&fields, &fields,
obkv,
&formatter, &formatter,
&matching_words, &matching_words,
&formatted_options, &formatted_options,
@ -1404,25 +1378,18 @@ mod test {
let title = fields.insert("title").unwrap(); let title = fields.insert("title").unwrap();
let author = fields.insert("author").unwrap(); let author = fields.insert("author").unwrap();
let mut buf = Vec::new(); let document: serde_json::Value = json!({
let mut obkv = obkv::KvWriter::new(&mut buf); "title": "Harry Potter and the Half-Blood Prince",
obkv.insert( "author": "J. K. Rowling",
title, });
Value::String("Harry Potter and the Half-Blood Prince".into())
.to_string()
.as_bytes(),
)
.unwrap();
obkv.finish().unwrap();
obkv = obkv::KvWriter::new(&mut buf);
obkv.insert(
author,
Value::String("J. K. Rowling".into()).to_string().as_bytes(),
)
.unwrap();
obkv.finish().unwrap();
let obkv = obkv::KvReader::new(&buf); // we need to convert the `serde_json::Map` into an `IndexMap`.
let document = document
.as_object()
.unwrap()
.into_iter()
.map(|(k, v)| (k.clone(), v.clone()))
.collect();
let mut formatted_options = BTreeMap::new(); let mut formatted_options = BTreeMap::new();
formatted_options.insert( formatted_options.insert(
@ -1444,8 +1411,8 @@ mod test {
matching_words.insert("and", Some(3)); matching_words.insert("and", Some(3));
let value = format_fields( let value = format_fields(
&document,
&fields, &fields,
obkv,
&formatter, &formatter,
&matching_words, &matching_words,
&formatted_options, &formatted_options,
@ -1472,25 +1439,18 @@ mod test {
let title = fields.insert("title").unwrap(); let title = fields.insert("title").unwrap();
let author = fields.insert("author").unwrap(); let author = fields.insert("author").unwrap();
let mut buf = Vec::new(); let document: serde_json::Value = json!({
let mut obkv = obkv::KvWriter::new(&mut buf); "title": "Harry Potter and the Half-Blood Prince",
obkv.insert( "author": "J. K. Rowling",
title, });
Value::String("Harry Potter and the Half-Blood Prince".into())
.to_string()
.as_bytes(),
)
.unwrap();
obkv.finish().unwrap();
obkv = obkv::KvWriter::new(&mut buf);
obkv.insert(
author,
Value::String("J. K. Rowling".into()).to_string().as_bytes(),
)
.unwrap();
obkv.finish().unwrap();
let obkv = obkv::KvReader::new(&buf); // we need to convert the `serde_json::Map` into an `IndexMap`.
let document = document
.as_object()
.unwrap()
.into_iter()
.map(|(k, v)| (k.clone(), v.clone()))
.collect();
let mut formatted_options = BTreeMap::new(); let mut formatted_options = BTreeMap::new();
formatted_options.insert( formatted_options.insert(
@ -1512,8 +1472,8 @@ mod test {
matching_words.insert("blood", Some(3)); matching_words.insert("blood", Some(3));
let value = format_fields( let value = format_fields(
&document,
&fields, &fields,
obkv,
&formatter, &formatter,
&matching_words, &matching_words,
&formatted_options, &formatted_options,

View File

@ -286,7 +286,7 @@ impl Index {
self.indexer_config.as_ref(), self.indexer_config.as_ref(),
config, config,
indexing_callback, indexing_callback,
); )?;
for content_uuid in contents.into_iter() { for content_uuid in contents.into_iter() {
let content_file = file_store.get_update(content_uuid)?; let content_file = file_store.get_update(content_uuid)?;