Cleanup CowStr and TopLevelMap struct

This commit is contained in:
Clément Renault 2024-09-05 18:32:55 +02:00
parent 10f09c531f
commit 8412be4a7d
No known key found for this signature in database
GPG Key ID: F250A4C4E3AE5F5F
2 changed files with 17 additions and 98 deletions

View File

@ -49,7 +49,7 @@ rayon = "1.10.0"
roaring = { version = "0.10.6", features = ["serde"] } roaring = { version = "0.10.6", features = ["serde"] }
rstar = { version = "0.12.0", features = ["serde"] } rstar = { version = "0.12.0", features = ["serde"] }
serde = { version = "1.0.204", features = ["derive"] } serde = { version = "1.0.204", features = ["derive"] }
serde_json = { version = "1.0.120", features = ["preserve_order"] } serde_json = { version = "1.0.120", features = ["preserve_order", "raw_value"] }
slice-group-by = "0.3.1" slice-group-by = "0.3.1"
smallstr = { version = "0.3.0", features = ["serde"] } smallstr = { version = "0.3.0", features = ["serde"] }
smallvec = "1.13.2" smallvec = "1.13.2"

View File

@ -339,121 +339,40 @@ fn merge_document_for_updates(
} }
} }
/* use std::borrow::Borrow;
use std::{
borrow::{Borrow, Cow},
collections::BTreeMap,
ops::Deref,
};
use serde::Deserialize; use serde::Deserialize;
use serde_json::{value::RawValue, Value}; use serde_json::from_str;
/* use serde_json::value::RawValue;
#[repr(transparent)]
pub struct my_str(str);
impl ToOwned for my_str { #[derive(Deserialize)]
type Owned = Box<str>; pub struct TopLevelMap<'p>(#[serde(borrow)] BTreeMap<CowStr<'p>, &'p RawValue>);
fn to_owned(&self) -> Self::Owned {
self.0.to_string().into_boxed_str()
}
}
impl Borrow<my_str> for Box<str> {
fn borrow(&self) -> &my_str {
unsafe { std::mem::transmute(self.as_ref()) }
}
}
*/
#[derive(Deserialize, PartialEq, Eq, PartialOrd, Ord)] #[derive(Deserialize, PartialEq, Eq, PartialOrd, Ord)]
pub struct CowKey<'doc>(#[serde(borrow)] Cow<'doc, str>); pub struct CowStr<'p>(#[serde(borrow)] Cow<'p, str>);
impl<'doc> Borrow<str> for CowKey<'doc> { impl<'doc> Borrow<str> for CowStr<'doc> {
fn borrow(&self) -> &str { fn borrow(&self) -> &str {
self.0.borrow() self.0.borrow()
} }
} }
#[derive(Deserialize)] fn get_docid<'p>(
pub struct TopLevelMap<'doc>(#[serde(borrow)] BTreeMap<CowKey<'doc>, &'doc RawValue>); map: &TopLevelMap<'p>,
#[derive(Deserialize)]
pub struct FlatDocs<'doc>(#[serde(borrow)] Vec<&'doc RawValue>);
fn read_docs<'doc>(
ndjson: &'doc str,
) -> impl Iterator<Item = Result<TopLevelMap<'doc>, serde_json::Error>> {
serde_json::Deserializer::from_str(ndjson).into_iter::<TopLevelMap>()
}
fn main() {
let ndjson_data = r#"
{
"id": {
"nested": "kefir"
},
"name": "Alice",
"age": 30
}
{
"id": {
"nested": "intel"
},
"name\n": "Bob",
"age": 22
}
"#;
let primary_key: Vec<_> = "id.nested".split('.').collect(); // dynamic
for doc in read_docs(ndjson_data) {
let doc = doc.unwrap();
let docid = get_docid(&doc, &primary_key).unwrap().expect("missingno");
println!("docid={docid}");
}
}
pub struct Document<'payload> {
fields: TopLevelMap<'payload>,
docid: String,
}
/*impl<'payload> Document<'payload> {
pub fn get(name: &str) -> Option<&'payload RawValue> {}
pub fn get_nested(name: &[&str]) {}
}*/
fn get_docid<'payload>(
map: &TopLevelMap<'payload>,
primary_key: &[&str], primary_key: &[&str],
) -> serde_json::Result<Option<CowKey<'payload>>> { ) -> serde_json::Result<Option<CowStr<'p>>> {
match primary_key { match primary_key {
[] => unreachable!("arrrgh"), [] => unreachable!("arrrgh"), // would None be ok?
[primary_key] => match map.0.get(*primary_key) { [primary_key] => match map.0.get(*primary_key) {
Some(value) => { Some(value) => match from_str::<u64>(value.get()) {
let value = value.get(); Ok(value) => Ok(Some(CowStr(Cow::Owned(value.to_string())))),
let value_number: Result<u64, _> = serde_json::from_str(value); Err(_) => Ok(Some(from_str(value.get())?)),
Ok(Some(match value_number { },
Ok(value) => CowKey(Cow::Owned(value.to_string())),
Err(_) => serde_json::from_str(value)?,
}))
}
None => Ok(None), None => Ok(None),
}, },
[head, tail @ ..] => match map.0.get(*head) { [head, tail @ ..] => match map.0.get(*head) {
Some(value) => { Some(value) => get_docid(&from_str(value.get())?, tail),
let map = serde_json::from_str(value.get())?;
get_docid(&map, tail)
}
None => Ok(None), None => Ok(None),
}, },
} }
} }
*/