From 8412be4a7ddadd648c3dca4c8215306aa8c2b0ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 5 Sep 2024 18:32:55 +0200 Subject: [PATCH] Cleanup CowStr and TopLevelMap struct --- milli/Cargo.toml | 2 +- .../update/new/indexer/document_operation.rs | 113 +++--------------- 2 files changed, 17 insertions(+), 98 deletions(-) diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 7059ed7f5..1fa754069 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -49,7 +49,7 @@ rayon = "1.10.0" roaring = { version = "0.10.6", features = ["serde"] } rstar = { version = "0.12.0", features = ["serde"] } serde = { version = "1.0.204", features = ["derive"] } -serde_json = { version = "1.0.120", features = ["preserve_order"] } +serde_json = { version = "1.0.120", features = ["preserve_order", "raw_value"] } slice-group-by = "0.3.1" smallstr = { version = "0.3.0", features = ["serde"] } smallvec = "1.13.2" diff --git a/milli/src/update/new/indexer/document_operation.rs b/milli/src/update/new/indexer/document_operation.rs index 29f36a82e..c30665f17 100644 --- a/milli/src/update/new/indexer/document_operation.rs +++ b/milli/src/update/new/indexer/document_operation.rs @@ -339,121 +339,40 @@ fn merge_document_for_updates( } } -/* - - -use std::{ - borrow::{Borrow, Cow}, - collections::BTreeMap, - ops::Deref, -}; +use std::borrow::Borrow; use serde::Deserialize; -use serde_json::{value::RawValue, Value}; -/* -#[repr(transparent)] -pub struct my_str(str); +use serde_json::from_str; +use serde_json::value::RawValue; -impl ToOwned for my_str { - type Owned = Box; - - fn to_owned(&self) -> Self::Owned { - self.0.to_string().into_boxed_str() - } -} - -impl Borrow for Box { - fn borrow(&self) -> &my_str { - unsafe { std::mem::transmute(self.as_ref()) } - } -} -*/ +#[derive(Deserialize)] +pub struct TopLevelMap<'p>(#[serde(borrow)] BTreeMap, &'p RawValue>); #[derive(Deserialize, PartialEq, Eq, PartialOrd, Ord)] -pub struct CowKey<'doc>(#[serde(borrow)] Cow<'doc, str>); +pub struct CowStr<'p>(#[serde(borrow)] Cow<'p, str>); -impl<'doc> Borrow for CowKey<'doc> { +impl<'doc> Borrow for CowStr<'doc> { fn borrow(&self) -> &str { self.0.borrow() } } -#[derive(Deserialize)] -pub struct TopLevelMap<'doc>(#[serde(borrow)] BTreeMap, &'doc RawValue>); - -#[derive(Deserialize)] -pub struct FlatDocs<'doc>(#[serde(borrow)] Vec<&'doc RawValue>); - -fn read_docs<'doc>( - ndjson: &'doc str, -) -> impl Iterator, serde_json::Error>> { - serde_json::Deserializer::from_str(ndjson).into_iter::() -} - -fn main() { - let ndjson_data = r#" - { - "id": { - "nested": "kefir" - }, - "name": "Alice", - "age": 30 - } - { - "id": { - "nested": "intel" - }, - "name\n": "Bob", - "age": 22 - } - "#; - - let primary_key: Vec<_> = "id.nested".split('.').collect(); // dynamic - - for doc in read_docs(ndjson_data) { - let doc = doc.unwrap(); - let docid = get_docid(&doc, &primary_key).unwrap().expect("missingno"); - println!("docid={docid}"); - } -} - -pub struct Document<'payload> { - fields: TopLevelMap<'payload>, - docid: String, -} - -/*impl<'payload> Document<'payload> { - pub fn get(name: &str) -> Option<&'payload RawValue> {} - - pub fn get_nested(name: &[&str]) {} -}*/ - -fn get_docid<'payload>( - map: &TopLevelMap<'payload>, +fn get_docid<'p>( + map: &TopLevelMap<'p>, primary_key: &[&str], -) -> serde_json::Result>> { +) -> serde_json::Result>> { match primary_key { - [] => unreachable!("arrrgh"), + [] => unreachable!("arrrgh"), // would None be ok? [primary_key] => match map.0.get(*primary_key) { - Some(value) => { - let value = value.get(); - let value_number: Result = serde_json::from_str(value); - Ok(Some(match value_number { - Ok(value) => CowKey(Cow::Owned(value.to_string())), - Err(_) => serde_json::from_str(value)?, - })) - } + Some(value) => match from_str::(value.get()) { + Ok(value) => Ok(Some(CowStr(Cow::Owned(value.to_string())))), + Err(_) => Ok(Some(from_str(value.get())?)), + }, None => Ok(None), }, [head, tail @ ..] => match map.0.get(*head) { - Some(value) => { - let map = serde_json::from_str(value.get())?; - get_docid(&map, tail) - } + Some(value) => get_docid(&from_str(value.get())?, tail), None => Ok(None), }, } } - - -*/