From 2e62925a6e5f6ee864410322758b32c39a8f1a38 Mon Sep 17 00:00:00 2001 From: marin postma Date: Sun, 24 Oct 2021 14:41:36 +0200 Subject: [PATCH] fix tests --- milli/src/documents/builder.rs | 51 +++++++++---------------- milli/src/documents/mod.rs | 28 ++++++++------ milli/src/documents/serde.rs | 1 + milli/src/index.rs | 1 + milli/src/search/distinct/mod.rs | 5 ++- milli/src/update/index_documents/mod.rs | 4 +- milli/tests/search/mod.rs | 10 +++-- milli/tests/search/query_criteria.rs | 4 +- 8 files changed, 49 insertions(+), 55 deletions(-) diff --git a/milli/src/documents/builder.rs b/milli/src/documents/builder.rs index 719580b4a..8c70910b5 100644 --- a/milli/src/documents/builder.rs +++ b/milli/src/documents/builder.rs @@ -1,5 +1,4 @@ use std::collections::BTreeMap; -use std::collections::HashMap; use std::io; use std::io::Cursor; use std::io::Write; @@ -18,18 +17,6 @@ use super::{ByteCounter, DocumentsBatchIndex, DocumentsMetadata, Error}; /// /// The writer used by the DocumentBatchBuilder can be read using a `DocumentBatchReader` to /// iterate other the documents. -/// -/// ## example: -/// ``` -/// use milli::documents::DocumentBatchBuilder; -/// use serde_json::json; -/// use std::io::Cursor; -/// -/// let mut writer = Cursor::new(Vec::new()); -/// let mut builder = DocumentBatchBuilder::new(&mut writer).unwrap(); -/// builder.add_documents(json!({"id": 1, "name": "foo"})).unwrap(); -/// builder.finish().unwrap(); -/// ``` pub struct DocumentBatchBuilder { inner: ByteCounter, index: DocumentsBatchIndex, @@ -100,7 +87,7 @@ impl DocumentBatchBuilder { count: &mut self.count, }; - de.deserialize_any(&mut visitor).unwrap(); + de.deserialize_any(&mut visitor).map_err(Error::JsonError)?; Ok(()) } @@ -112,10 +99,11 @@ impl DocumentBatchBuilder { /// optimizations. /// /// From csv takes care to call finish in the end. - pub fn from_csv(mut self, reader: R) -> Result<(), Error> { + pub fn from_csv(reader: R, writer: W) -> Result { + let mut this = Self::new(writer)?; // Ensure that this is the first and only addition made with this builder - debug_assert!(self.index.is_empty()); + debug_assert!(this.index.is_empty()); let mut records = csv::Reader::from_reader(reader); @@ -124,40 +112,37 @@ impl DocumentBatchBuilder { .unwrap() .into_iter() .map(parse_csv_header) - .map(|(k, t)| (self.index.insert(&k), t)) - .collect::>(); + .map(|(k, t)| (this.index.insert(&k), t)) + .collect::>(); let records = records.into_records(); - dbg!(&headers); for record in records { match record { Ok(record) => { - let mut writer = obkv::KvWriter::new(Cursor::new(&mut self.obkv_buffer)); + let mut writer = obkv::KvWriter::new(Cursor::new(&mut this.obkv_buffer)); for (value, (fid, ty)) in record.into_iter().zip(headers.iter()) { let value = match ty { AllowedType::Number => value.parse::().map(Value::from).unwrap(), AllowedType::String => Value::String(value.to_string()), }; - serde_json::to_writer(Cursor::new(&mut self.value_buffer), dbg!(&value)).unwrap(); - writer.insert(*fid, &self.value_buffer)?; - self.value_buffer.clear(); + serde_json::to_writer(Cursor::new(&mut this.value_buffer), &value).unwrap(); + writer.insert(*fid, &this.value_buffer)?; + this.value_buffer.clear(); } - self.inner.write_u32::(self.obkv_buffer.len() as u32)?; - self.inner.write_all(&self.obkv_buffer)?; + this.inner.write_u32::(this.obkv_buffer.len() as u32)?; + this.inner.write_all(&this.obkv_buffer)?; - self.obkv_buffer.clear(); - self.count += 1; + this.obkv_buffer.clear(); + this.count += 1; }, Err(_) => panic!(), } } - self.finish()?; - - Ok(()) + Ok(this) } } @@ -265,18 +250,16 @@ mod test { #[test] fn add_documents_csv() { let mut cursor = Cursor::new(Vec::new()); - let builder = DocumentBatchBuilder::new(&mut cursor).unwrap(); let csv = "id:number,field:string\n1,hello!\n2,blabla"; - builder.from_csv(Cursor::new(csv.as_bytes())).unwrap(); + let builder = DocumentBatchBuilder::from_csv(Cursor::new(csv.as_bytes()), &mut cursor).unwrap(); + builder.finish().unwrap(); cursor.set_position(0); let mut reader = DocumentBatchReader::from_reader(cursor).unwrap(); - dbg!(reader.len()); - let (index, document) = reader.next_document_with_index().unwrap().unwrap(); assert_eq!(index.len(), 2); assert_eq!(document.iter().count(), 2); diff --git a/milli/src/documents/mod.rs b/milli/src/documents/mod.rs index 9f6ebd3de..8a8b87794 100644 --- a/milli/src/documents/mod.rs +++ b/milli/src/documents/mod.rs @@ -135,6 +135,8 @@ macro_rules! documents { #[cfg(test)] mod test { + use std::io::Cursor; + use serde_json::{json, Value}; use super::*; @@ -151,13 +153,14 @@ mod test { "bool": true }); + let json = serde_json::to_vec(&json).unwrap(); + let mut v = Vec::new(); let mut cursor = io::Cursor::new(&mut v); let mut builder = DocumentBatchBuilder::new(&mut cursor).unwrap(); - todo!(); - //builder.add_documents(json).unwrap(); + builder.extend_from_json(Cursor::new(json)).unwrap(); builder.finish().unwrap(); @@ -181,14 +184,16 @@ mod test { "toto": false, }); + let doc1 = serde_json::to_vec(&doc1).unwrap(); + let doc2 = serde_json::to_vec(&doc2).unwrap(); + let mut v = Vec::new(); let mut cursor = io::Cursor::new(&mut v); let mut builder = DocumentBatchBuilder::new(&mut cursor).unwrap(); - todo!(); - //builder.add_documents(doc1).unwrap(); - //builder.add_documents(doc2).unwrap(); + builder.extend_from_json(Cursor::new(doc1)).unwrap(); + builder.extend_from_json(Cursor::new(doc2)).unwrap(); builder.finish().unwrap(); @@ -211,13 +216,14 @@ mod test { { "tata": "hello" }, ]); + let docs = serde_json::to_vec(&docs).unwrap(); + let mut v = Vec::new(); let mut cursor = io::Cursor::new(&mut v); let mut builder = DocumentBatchBuilder::new(&mut cursor).unwrap(); - todo!(); - //builder.add_documents(docs).unwrap(); + builder.extend_from_json(Cursor::new(docs)).unwrap(); builder.finish().unwrap(); @@ -245,13 +251,13 @@ mod test { { "tata": "hello" }, ]]); - todo!(); - //assert!(builder.add_documents(docs).is_err()); + let docs = serde_json::to_vec(&docs).unwrap(); + assert!(builder.extend_from_json(Cursor::new(docs)).is_err()); let docs = json!("hello"); + let docs = serde_json::to_vec(&docs).unwrap(); - todo!(); - //assert!(builder.add_documents(docs).is_err()); + assert!(builder.extend_from_json(Cursor::new(docs)).is_err()); } #[test] diff --git a/milli/src/documents/serde.rs b/milli/src/documents/serde.rs index 86fb68534..2466ed373 100644 --- a/milli/src/documents/serde.rs +++ b/milli/src/documents/serde.rs @@ -98,6 +98,7 @@ impl<'a, 'de, W: Write> Visitor<'de> for &mut DocumentVisitor<'a, W> { self.inner.write_all(reader).unwrap(); *self.count += 1; + self.values.clear(); Ok(()) } diff --git a/milli/src/index.rs b/milli/src/index.rs index 6ce693fbe..fe89fe734 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -953,6 +953,7 @@ pub(crate) mod tests { { "id": 1, "name": "kevin", "has_dog": true }, { "id": 2, "name": "bob" } ]); + let mut wtxn = index.write_txn().unwrap(); let builder = IndexDocuments::new(&mut wtxn, &index, 0); builder.execute(content, |_, _| ()).unwrap(); diff --git a/milli/src/search/distinct/mod.rs b/milli/src/search/distinct/mod.rs index 5639c53fa..11f6379e3 100644 --- a/milli/src/search/distinct/mod.rs +++ b/milli/src/search/distinct/mod.rs @@ -68,8 +68,9 @@ mod test { "txts": sample_txts[..(rng.gen_range(0..3))], "cat-ints": sample_ints[..(rng.gen_range(0..3))], }); - todo!() - //builder.add_documents(doc).unwrap(); + + let doc = Cursor::new(serde_json::to_vec(&doc).unwrap()); + builder.extend_from_json(doc).unwrap(); } builder.finish().unwrap(); diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 58e21a615..17c778060 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -877,8 +877,8 @@ mod tests { let mut cursor = Cursor::new(Vec::new()); let mut builder = DocumentBatchBuilder::new(&mut cursor).unwrap(); - todo!(); - //builder.add_documents(big_object).unwrap(); + let big_object = Cursor::new(serde_json::to_vec(&big_object).unwrap()); + builder.extend_from_json(big_object).unwrap(); builder.finish().unwrap(); cursor.set_position(0); let content = DocumentBatchReader::from_reader(cursor).unwrap(); diff --git a/milli/tests/search/mod.rs b/milli/tests/search/mod.rs index d62b8ec31..e8fb3fdfa 100644 --- a/milli/tests/search/mod.rs +++ b/milli/tests/search/mod.rs @@ -61,10 +61,12 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { let mut cursor = Cursor::new(Vec::new()); let mut documents_builder = DocumentBatchBuilder::new(&mut cursor).unwrap(); let reader = Cursor::new(CONTENT.as_bytes()); - todo!(); - //for doc in serde_json::Deserializer::from_reader(reader).into_iter::() { - //documents_builder.add_documents(doc.unwrap()).unwrap(); - //} + + for doc in serde_json::Deserializer::from_reader(reader).into_iter::() { + let doc = Cursor::new(serde_json::to_vec(&doc.unwrap()).unwrap()); + documents_builder.extend_from_json(doc).unwrap(); + } + documents_builder.finish().unwrap(); cursor.set_position(0); diff --git a/milli/tests/search/query_criteria.rs b/milli/tests/search/query_criteria.rs index 3fb36b1d5..e5dde049c 100644 --- a/milli/tests/search/query_criteria.rs +++ b/milli/tests/search/query_criteria.rs @@ -409,8 +409,8 @@ fn criteria_ascdesc() { "age": age, }); - todo!(); - //batch_builder.add_documents(json).unwrap(); + let json = Cursor::new(serde_json::to_vec(&json).unwrap()); + batch_builder.extend_from_json(json).unwrap(); }); batch_builder.finish().unwrap();