only flatten an object if it's nested

This commit is contained in:
Tamo 2022-04-12 11:22:36 +02:00 committed by Irevoire
parent c2469b6765
commit 399fba16bb
No known key found for this signature in database
GPG Key ID: 7A6A970C96104F1B
2 changed files with 23 additions and 8 deletions

View File

@ -18,6 +18,7 @@ flatten-serde-json = { path = "../flatten-serde-json" }
grenad = { version = "0.4.1", default-features = false, features = ["tempfile"] }
geoutils = "0.4.1"
heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.1", default-features = false, features = ["lmdb", "sync-read-txn"] }
json-depth-checker = { path = "../json-depth-checker" }
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.9" }
memmap2 = "0.5.3"

View File

@ -286,9 +286,11 @@ impl<'a, 'i> Transform<'a, 'i> {
})?;
self.original_sorter.insert(&docid.to_be_bytes(), base_obkv)?;
let buffer = self.flatten_from_fields_ids_map(KvReader::new(&base_obkv))?;
self.flattened_sorter.insert(docid.to_be_bytes(), &buffer)?;
if let Some(buffer) = self.flatten_from_fields_ids_map(KvReader::new(&base_obkv))? {
self.flattened_sorter.insert(docid.to_be_bytes(), &buffer)?;
} else {
self.flattened_sorter.insert(docid.to_be_bytes(), base_obkv)?;
}
} else {
self.new_documents_ids.insert(docid);
}
@ -300,8 +302,13 @@ impl<'a, 'i> Transform<'a, 'i> {
if let Some(flatten) = flattened_document {
self.flattened_sorter.insert(docid.to_be_bytes(), &flatten)?;
} else {
let buffer = self.flatten_from_fields_ids_map(KvReader::new(&obkv_buffer))?;
self.flattened_sorter.insert(docid.to_be_bytes(), &buffer)?;
if let Some(buffer) =
self.flatten_from_fields_ids_map(KvReader::new(&obkv_buffer))?
{
self.flattened_sorter.insert(docid.to_be_bytes(), &buffer)?;
} else {
self.flattened_sorter.insert(docid.to_be_bytes(), obkv_buffer.clone())?;
}
}
progress_callback(UpdateIndexingStep::RemapDocumentAddition {
@ -326,8 +333,15 @@ impl<'a, 'i> Transform<'a, 'i> {
}
// Flatten a document from the fields ids map contained in self and insert the new
// created fields.
fn flatten_from_fields_ids_map(&mut self, obkv: KvReader<FieldId>) -> Result<Vec<u8>> {
// created fields. Returns `None` if the document doesn't need to be flattened.
fn flatten_from_fields_ids_map(&mut self, obkv: KvReader<FieldId>) -> Result<Option<Vec<u8>>> {
if obkv
.iter()
.all(|(_, value)| !json_depth_checker::should_flatten_from_unchecked_slice(value))
{
return Ok(None);
}
let mut doc = serde_json::Map::new();
for (k, v) in obkv.iter() {
@ -357,7 +371,7 @@ impl<'a, 'i> Transform<'a, 'i> {
writer.insert(fid, &value)?;
}
Ok(buffer)
Ok(Some(buffer))
}
// Flatten a document from a field mapping generated by [create_fields_mapping]