mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 14:54:27 +01:00
Fix the tests for the new DocumentsBatchBuilder/Reader
This commit is contained in:
parent
419ce3966c
commit
e8297ad27e
@ -183,7 +183,8 @@ mod test {
|
|||||||
use serde_json::{json, Map};
|
use serde_json::{json, Map};
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::documents::DocumentBatchReader;
|
use crate::documents::DocumentsBatchReader;
|
||||||
|
use crate::FieldId;
|
||||||
|
|
||||||
fn obkv_to_value(obkv: &obkv::KvReader<FieldId>, index: &DocumentsBatchIndex) -> Value {
|
fn obkv_to_value(obkv: &obkv::KvReader<FieldId>, index: &DocumentsBatchIndex) -> Value {
|
||||||
let mut map = Map::new();
|
let mut map = Map::new();
|
||||||
@ -192,7 +193,7 @@ mod test {
|
|||||||
let field_name = index.name(fid).unwrap().clone();
|
let field_name = index.name(fid).unwrap().clone();
|
||||||
let value: Value = serde_json::from_slice(value).unwrap();
|
let value: Value = serde_json::from_slice(value).unwrap();
|
||||||
|
|
||||||
map.insert(field_name, value);
|
map.insert(field_name.to_string(), value);
|
||||||
}
|
}
|
||||||
|
|
||||||
Value::Object(map)
|
Value::Object(map)
|
||||||
@ -200,15 +201,13 @@ mod test {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn add_single_documents_json() {
|
fn add_single_documents_json() {
|
||||||
let mut cursor = Cursor::new(Vec::new());
|
|
||||||
let mut builder = DocumentBatchBuilder::new(&mut cursor).unwrap();
|
|
||||||
|
|
||||||
let json = serde_json::json!({
|
let json = serde_json::json!({
|
||||||
"id": 1,
|
"id": 1,
|
||||||
"field": "hello!",
|
"field": "hello!",
|
||||||
});
|
});
|
||||||
|
|
||||||
builder.extend_from_json(Cursor::new(serde_json::to_vec(&json).unwrap())).unwrap();
|
let mut builder = DocumentsBatchBuilder::new(Vec::new());
|
||||||
|
builder.append_json_object(json.as_object().unwrap()).unwrap();
|
||||||
|
|
||||||
let json = serde_json::json!({
|
let json = serde_json::json!({
|
||||||
"blabla": false,
|
"blabla": false,
|
||||||
@ -216,100 +215,64 @@ mod test {
|
|||||||
"id": 1,
|
"id": 1,
|
||||||
});
|
});
|
||||||
|
|
||||||
builder.extend_from_json(Cursor::new(serde_json::to_vec(&json).unwrap())).unwrap();
|
builder.append_json_object(json.as_object().unwrap()).unwrap();
|
||||||
|
|
||||||
assert_eq!(builder.len(), 2);
|
assert_eq!(builder.documents_count(), 2);
|
||||||
|
let vector = builder.into_inner().unwrap();
|
||||||
|
|
||||||
builder.finish().unwrap();
|
let mut cursor =
|
||||||
|
DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap().into_cursor();
|
||||||
cursor.set_position(0);
|
let index = cursor.documents_batch_index().clone();
|
||||||
|
|
||||||
let mut reader = DocumentBatchReader::from_reader(cursor).unwrap();
|
|
||||||
|
|
||||||
let (index, document) = reader.next_document_with_index().unwrap().unwrap();
|
|
||||||
assert_eq!(index.len(), 3);
|
assert_eq!(index.len(), 3);
|
||||||
|
|
||||||
|
let document = cursor.next_document().unwrap().unwrap();
|
||||||
assert_eq!(document.iter().count(), 2);
|
assert_eq!(document.iter().count(), 2);
|
||||||
|
|
||||||
let (index, document) = reader.next_document_with_index().unwrap().unwrap();
|
let document = cursor.next_document().unwrap().unwrap();
|
||||||
assert_eq!(index.len(), 3);
|
|
||||||
assert_eq!(document.iter().count(), 3);
|
assert_eq!(document.iter().count(), 3);
|
||||||
|
|
||||||
assert!(reader.next_document_with_index().unwrap().is_none());
|
assert!(cursor.next_document().unwrap().is_none());
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn add_documents_seq_json() {
|
|
||||||
let mut cursor = Cursor::new(Vec::new());
|
|
||||||
let mut builder = DocumentBatchBuilder::new(&mut cursor).unwrap();
|
|
||||||
|
|
||||||
let json = serde_json::json!([{
|
|
||||||
"id": 1,
|
|
||||||
"field": "hello!",
|
|
||||||
},{
|
|
||||||
"blabla": false,
|
|
||||||
"field": "hello!",
|
|
||||||
"id": 1,
|
|
||||||
}
|
|
||||||
]);
|
|
||||||
|
|
||||||
builder.extend_from_json(Cursor::new(serde_json::to_vec(&json).unwrap())).unwrap();
|
|
||||||
|
|
||||||
assert_eq!(builder.len(), 2);
|
|
||||||
|
|
||||||
builder.finish().unwrap();
|
|
||||||
|
|
||||||
cursor.set_position(0);
|
|
||||||
|
|
||||||
let mut reader = DocumentBatchReader::from_reader(cursor).unwrap();
|
|
||||||
|
|
||||||
let (index, document) = reader.next_document_with_index().unwrap().unwrap();
|
|
||||||
assert_eq!(index.len(), 3);
|
|
||||||
assert_eq!(document.iter().count(), 2);
|
|
||||||
|
|
||||||
let (index, document) = reader.next_document_with_index().unwrap().unwrap();
|
|
||||||
assert_eq!(index.len(), 3);
|
|
||||||
assert_eq!(document.iter().count(), 3);
|
|
||||||
|
|
||||||
assert!(reader.next_document_with_index().unwrap().is_none());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn add_documents_csv() {
|
fn add_documents_csv() {
|
||||||
let mut cursor = Cursor::new(Vec::new());
|
let csv_content = "id:number,field:string\n1,hello!\n2,blabla";
|
||||||
|
let csv = csv::Reader::from_reader(Cursor::new(csv_content));
|
||||||
|
|
||||||
let csv = "id:number,field:string\n1,hello!\n2,blabla";
|
let mut builder = DocumentsBatchBuilder::new(Vec::new());
|
||||||
|
builder.append_csv(csv).unwrap();
|
||||||
|
assert_eq!(builder.documents_count(), 2);
|
||||||
|
let vector = builder.into_inner().unwrap();
|
||||||
|
|
||||||
let builder =
|
let mut cursor =
|
||||||
DocumentBatchBuilder::from_csv(Cursor::new(csv.as_bytes()), &mut cursor).unwrap();
|
DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap().into_cursor();
|
||||||
builder.finish().unwrap();
|
let index = cursor.documents_batch_index().clone();
|
||||||
|
|
||||||
cursor.set_position(0);
|
|
||||||
|
|
||||||
let mut reader = DocumentBatchReader::from_reader(cursor).unwrap();
|
|
||||||
|
|
||||||
let (index, document) = reader.next_document_with_index().unwrap().unwrap();
|
|
||||||
assert_eq!(index.len(), 2);
|
assert_eq!(index.len(), 2);
|
||||||
|
|
||||||
|
let document = cursor.next_document().unwrap().unwrap();
|
||||||
assert_eq!(document.iter().count(), 2);
|
assert_eq!(document.iter().count(), 2);
|
||||||
|
|
||||||
let (_index, document) = reader.next_document_with_index().unwrap().unwrap();
|
let document = cursor.next_document().unwrap().unwrap();
|
||||||
assert_eq!(document.iter().count(), 2);
|
assert_eq!(document.iter().count(), 2);
|
||||||
|
|
||||||
assert!(reader.next_document_with_index().unwrap().is_none());
|
assert!(cursor.next_document().unwrap().is_none());
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn simple_csv_document() {
|
fn simple_csv_document() {
|
||||||
let documents = r#"city,country,pop
|
let csv_content = r#"city,country,pop
|
||||||
"Boston","United States","4628910""#;
|
"Boston","United States","4628910""#;
|
||||||
|
let csv = csv::Reader::from_reader(Cursor::new(csv_content));
|
||||||
|
|
||||||
let mut buf = Vec::new();
|
let mut builder = DocumentsBatchBuilder::new(Vec::new());
|
||||||
DocumentBatchBuilder::from_csv(documents.as_bytes(), Cursor::new(&mut buf))
|
builder.append_csv(csv).unwrap();
|
||||||
.unwrap()
|
let vector = builder.into_inner().unwrap();
|
||||||
.finish()
|
|
||||||
.unwrap();
|
let mut cursor =
|
||||||
let mut reader = DocumentBatchReader::from_reader(Cursor::new(buf)).unwrap();
|
DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap().into_cursor();
|
||||||
let (index, doc) = reader.next_document_with_index().unwrap().unwrap();
|
let index = cursor.documents_batch_index().clone();
|
||||||
let val = obkv_to_value(&doc, index);
|
let doc = cursor.next_document().unwrap().unwrap();
|
||||||
|
let val = obkv_to_value(&doc, &index);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
val,
|
val,
|
||||||
@ -320,22 +283,25 @@ mod test {
|
|||||||
})
|
})
|
||||||
);
|
);
|
||||||
|
|
||||||
assert!(reader.next_document_with_index().unwrap().is_none());
|
assert!(cursor.next_document().unwrap().is_none());
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn coma_in_field() {
|
fn coma_in_field() {
|
||||||
let documents = r#"city,country,pop
|
let csv_content = r#"city,country,pop
|
||||||
"Boston","United, States","4628910""#;
|
"Boston","United, States","4628910""#;
|
||||||
|
let csv = csv::Reader::from_reader(Cursor::new(csv_content));
|
||||||
|
|
||||||
let mut buf = Vec::new();
|
let mut builder = DocumentsBatchBuilder::new(Vec::new());
|
||||||
DocumentBatchBuilder::from_csv(documents.as_bytes(), Cursor::new(&mut buf))
|
builder.append_csv(csv).unwrap();
|
||||||
.unwrap()
|
let vector = builder.into_inner().unwrap();
|
||||||
.finish()
|
|
||||||
.unwrap();
|
let mut cursor =
|
||||||
let mut reader = DocumentBatchReader::from_reader(Cursor::new(buf)).unwrap();
|
DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap().into_cursor();
|
||||||
let (index, doc) = reader.next_document_with_index().unwrap().unwrap();
|
let index = cursor.documents_batch_index().clone();
|
||||||
let val = obkv_to_value(&doc, index);
|
|
||||||
|
let doc = cursor.next_document().unwrap().unwrap();
|
||||||
|
let val = obkv_to_value(&doc, &index);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
val,
|
val,
|
||||||
@ -349,17 +315,20 @@ mod test {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn quote_in_field() {
|
fn quote_in_field() {
|
||||||
let documents = r#"city,country,pop
|
let csv_content = r#"city,country,pop
|
||||||
"Boston","United"" States","4628910""#;
|
"Boston","United"" States","4628910""#;
|
||||||
|
let csv = csv::Reader::from_reader(Cursor::new(csv_content));
|
||||||
|
|
||||||
let mut buf = Vec::new();
|
let mut builder = DocumentsBatchBuilder::new(Vec::new());
|
||||||
DocumentBatchBuilder::from_csv(documents.as_bytes(), Cursor::new(&mut buf))
|
builder.append_csv(csv).unwrap();
|
||||||
.unwrap()
|
let vector = builder.into_inner().unwrap();
|
||||||
.finish()
|
|
||||||
.unwrap();
|
let mut cursor =
|
||||||
let mut reader = DocumentBatchReader::from_reader(Cursor::new(buf)).unwrap();
|
DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap().into_cursor();
|
||||||
let (index, doc) = reader.next_document_with_index().unwrap().unwrap();
|
let index = cursor.documents_batch_index().clone();
|
||||||
let val = obkv_to_value(&doc, index);
|
|
||||||
|
let doc = cursor.next_document().unwrap().unwrap();
|
||||||
|
let val = obkv_to_value(&doc, &index);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
val,
|
val,
|
||||||
@ -373,17 +342,20 @@ mod test {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn integer_in_field() {
|
fn integer_in_field() {
|
||||||
let documents = r#"city,country,pop:number
|
let csv_content = r#"city,country,pop:number
|
||||||
"Boston","United States","4628910""#;
|
"Boston","United States","4628910""#;
|
||||||
|
let csv = csv::Reader::from_reader(Cursor::new(csv_content));
|
||||||
|
|
||||||
let mut buf = Vec::new();
|
let mut builder = DocumentsBatchBuilder::new(Vec::new());
|
||||||
DocumentBatchBuilder::from_csv(documents.as_bytes(), Cursor::new(&mut buf))
|
builder.append_csv(csv).unwrap();
|
||||||
.unwrap()
|
let vector = builder.into_inner().unwrap();
|
||||||
.finish()
|
|
||||||
.unwrap();
|
let mut cursor =
|
||||||
let mut reader = DocumentBatchReader::from_reader(Cursor::new(buf)).unwrap();
|
DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap().into_cursor();
|
||||||
let (index, doc) = reader.next_document_with_index().unwrap().unwrap();
|
let index = cursor.documents_batch_index().clone();
|
||||||
let val = obkv_to_value(&doc, index);
|
|
||||||
|
let doc = cursor.next_document().unwrap().unwrap();
|
||||||
|
let val = obkv_to_value(&doc, &index);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
val,
|
val,
|
||||||
@ -397,17 +369,20 @@ mod test {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn float_in_field() {
|
fn float_in_field() {
|
||||||
let documents = r#"city,country,pop:number
|
let csv_content = r#"city,country,pop:number
|
||||||
"Boston","United States","4628910.01""#;
|
"Boston","United States","4628910.01""#;
|
||||||
|
let csv = csv::Reader::from_reader(Cursor::new(csv_content));
|
||||||
|
|
||||||
let mut buf = Vec::new();
|
let mut builder = DocumentsBatchBuilder::new(Vec::new());
|
||||||
DocumentBatchBuilder::from_csv(documents.as_bytes(), Cursor::new(&mut buf))
|
builder.append_csv(csv).unwrap();
|
||||||
.unwrap()
|
let vector = builder.into_inner().unwrap();
|
||||||
.finish()
|
|
||||||
.unwrap();
|
let mut cursor =
|
||||||
let mut reader = DocumentBatchReader::from_reader(Cursor::new(buf)).unwrap();
|
DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap().into_cursor();
|
||||||
let (index, doc) = reader.next_document_with_index().unwrap().unwrap();
|
let index = cursor.documents_batch_index().clone();
|
||||||
let val = obkv_to_value(&doc, index);
|
|
||||||
|
let doc = cursor.next_document().unwrap().unwrap();
|
||||||
|
let val = obkv_to_value(&doc, &index);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
val,
|
val,
|
||||||
@ -421,17 +396,20 @@ mod test {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn several_colon_in_header() {
|
fn several_colon_in_header() {
|
||||||
let documents = r#"city:love:string,country:state,pop
|
let csv_content = r#"city:love:string,country:state,pop
|
||||||
"Boston","United States","4628910""#;
|
"Boston","United States","4628910""#;
|
||||||
|
let csv = csv::Reader::from_reader(Cursor::new(csv_content));
|
||||||
|
|
||||||
let mut buf = Vec::new();
|
let mut builder = DocumentsBatchBuilder::new(Vec::new());
|
||||||
DocumentBatchBuilder::from_csv(documents.as_bytes(), Cursor::new(&mut buf))
|
builder.append_csv(csv).unwrap();
|
||||||
.unwrap()
|
let vector = builder.into_inner().unwrap();
|
||||||
.finish()
|
|
||||||
.unwrap();
|
let mut cursor =
|
||||||
let mut reader = DocumentBatchReader::from_reader(Cursor::new(buf)).unwrap();
|
DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap().into_cursor();
|
||||||
let (index, doc) = reader.next_document_with_index().unwrap().unwrap();
|
let index = cursor.documents_batch_index().clone();
|
||||||
let val = obkv_to_value(&doc, index);
|
|
||||||
|
let doc = cursor.next_document().unwrap().unwrap();
|
||||||
|
let val = obkv_to_value(&doc, &index);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
val,
|
val,
|
||||||
@ -445,17 +423,20 @@ mod test {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn ending_by_colon_in_header() {
|
fn ending_by_colon_in_header() {
|
||||||
let documents = r#"city:,country,pop
|
let csv_content = r#"city:,country,pop
|
||||||
"Boston","United States","4628910""#;
|
"Boston","United States","4628910""#;
|
||||||
|
let csv = csv::Reader::from_reader(Cursor::new(csv_content));
|
||||||
|
|
||||||
let mut buf = Vec::new();
|
let mut builder = DocumentsBatchBuilder::new(Vec::new());
|
||||||
DocumentBatchBuilder::from_csv(documents.as_bytes(), Cursor::new(&mut buf))
|
builder.append_csv(csv).unwrap();
|
||||||
.unwrap()
|
let vector = builder.into_inner().unwrap();
|
||||||
.finish()
|
|
||||||
.unwrap();
|
let mut cursor =
|
||||||
let mut reader = DocumentBatchReader::from_reader(Cursor::new(buf)).unwrap();
|
DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap().into_cursor();
|
||||||
let (index, doc) = reader.next_document_with_index().unwrap().unwrap();
|
let index = cursor.documents_batch_index().clone();
|
||||||
let val = obkv_to_value(&doc, index);
|
|
||||||
|
let doc = cursor.next_document().unwrap().unwrap();
|
||||||
|
let val = obkv_to_value(&doc, &index);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
val,
|
val,
|
||||||
@ -469,17 +450,20 @@ mod test {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn starting_by_colon_in_header() {
|
fn starting_by_colon_in_header() {
|
||||||
let documents = r#":city,country,pop
|
let csv_content = r#":city,country,pop
|
||||||
"Boston","United States","4628910""#;
|
"Boston","United States","4628910""#;
|
||||||
|
let csv = csv::Reader::from_reader(Cursor::new(csv_content));
|
||||||
|
|
||||||
let mut buf = Vec::new();
|
let mut builder = DocumentsBatchBuilder::new(Vec::new());
|
||||||
DocumentBatchBuilder::from_csv(documents.as_bytes(), Cursor::new(&mut buf))
|
builder.append_csv(csv).unwrap();
|
||||||
.unwrap()
|
let vector = builder.into_inner().unwrap();
|
||||||
.finish()
|
|
||||||
.unwrap();
|
let mut cursor =
|
||||||
let mut reader = DocumentBatchReader::from_reader(Cursor::new(buf)).unwrap();
|
DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap().into_cursor();
|
||||||
let (index, doc) = reader.next_document_with_index().unwrap().unwrap();
|
let index = cursor.documents_batch_index().clone();
|
||||||
let val = obkv_to_value(&doc, index);
|
|
||||||
|
let doc = cursor.next_document().unwrap().unwrap();
|
||||||
|
let val = obkv_to_value(&doc, &index);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
val,
|
val,
|
||||||
@ -494,32 +478,36 @@ mod test {
|
|||||||
#[ignore]
|
#[ignore]
|
||||||
#[test]
|
#[test]
|
||||||
fn starting_by_colon_in_header2() {
|
fn starting_by_colon_in_header2() {
|
||||||
let documents = r#":string,country,pop
|
let csv_content = r#":string,country,pop
|
||||||
"Boston","United States","4628910""#;
|
"Boston","United States","4628910""#;
|
||||||
|
let csv = csv::Reader::from_reader(Cursor::new(csv_content));
|
||||||
|
|
||||||
let mut buf = Vec::new();
|
let mut builder = DocumentsBatchBuilder::new(Vec::new());
|
||||||
DocumentBatchBuilder::from_csv(documents.as_bytes(), Cursor::new(&mut buf))
|
builder.append_csv(csv).unwrap();
|
||||||
.unwrap()
|
let vector = builder.into_inner().unwrap();
|
||||||
.finish()
|
|
||||||
.unwrap();
|
|
||||||
let mut reader = DocumentBatchReader::from_reader(Cursor::new(buf)).unwrap();
|
|
||||||
|
|
||||||
assert!(reader.next_document_with_index().is_err());
|
let mut cursor =
|
||||||
|
DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap().into_cursor();
|
||||||
|
|
||||||
|
assert!(cursor.next_document().is_err());
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn double_colon_in_header() {
|
fn double_colon_in_header() {
|
||||||
let documents = r#"city::string,country,pop
|
let csv_content = r#"city::string,country,pop
|
||||||
"Boston","United States","4628910""#;
|
"Boston","United States","4628910""#;
|
||||||
|
let csv = csv::Reader::from_reader(Cursor::new(csv_content));
|
||||||
|
|
||||||
let mut buf = Vec::new();
|
let mut builder = DocumentsBatchBuilder::new(Vec::new());
|
||||||
DocumentBatchBuilder::from_csv(documents.as_bytes(), Cursor::new(&mut buf))
|
builder.append_csv(csv).unwrap();
|
||||||
.unwrap()
|
let vector = builder.into_inner().unwrap();
|
||||||
.finish()
|
|
||||||
.unwrap();
|
let mut cursor =
|
||||||
let mut reader = DocumentBatchReader::from_reader(Cursor::new(buf)).unwrap();
|
DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap().into_cursor();
|
||||||
let (index, doc) = reader.next_document_with_index().unwrap().unwrap();
|
let index = cursor.documents_batch_index().clone();
|
||||||
let val = obkv_to_value(&doc, index);
|
|
||||||
|
let doc = cursor.next_document().unwrap().unwrap();
|
||||||
|
let val = obkv_to_value(&doc, &index);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
val,
|
val,
|
||||||
@ -533,34 +521,32 @@ mod test {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn bad_type_in_header() {
|
fn bad_type_in_header() {
|
||||||
let documents = r#"city,country:number,pop
|
let csv_content = r#"city,country:number,pop
|
||||||
"Boston","United States","4628910""#;
|
"Boston","United States","4628910""#;
|
||||||
|
let csv = csv::Reader::from_reader(Cursor::new(csv_content));
|
||||||
|
|
||||||
let mut buf = Vec::new();
|
let mut builder = DocumentsBatchBuilder::new(Vec::new());
|
||||||
assert!(
|
assert!(builder.append_csv(csv).is_err());
|
||||||
DocumentBatchBuilder::from_csv(documents.as_bytes(), Cursor::new(&mut buf)).is_err()
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn bad_column_count1() {
|
fn bad_column_count1() {
|
||||||
let documents = r#"city,country,pop
|
let csv_content = r#"city,country,pop
|
||||||
"Boston","United States","4628910", "too much""#;
|
"Boston","United States","4628910", "too much
|
||||||
|
let csv = csv::Reader::from_reader(Cursor::new(csv_content"#;
|
||||||
|
let csv = csv::Reader::from_reader(Cursor::new(csv_content));
|
||||||
|
|
||||||
let mut buf = Vec::new();
|
let mut builder = DocumentsBatchBuilder::new(Vec::new());
|
||||||
assert!(
|
assert!(builder.append_csv(csv).is_err());
|
||||||
DocumentBatchBuilder::from_csv(documents.as_bytes(), Cursor::new(&mut buf)).is_err()
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn bad_column_count2() {
|
fn bad_column_count2() {
|
||||||
let documents = r#"city,country,pop
|
let csv_content = r#"city,country,pop
|
||||||
"Boston","United States""#;
|
"Boston","United States""#;
|
||||||
|
let csv = csv::Reader::from_reader(Cursor::new(csv_content));
|
||||||
|
|
||||||
let mut buf = Vec::new();
|
let mut builder = DocumentsBatchBuilder::new(Vec::new());
|
||||||
assert!(
|
assert!(builder.append_csv(csv).is_err());
|
||||||
DocumentBatchBuilder::from_csv(documents.as_bytes(), Cursor::new(&mut buf)).is_err()
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -159,7 +159,7 @@ mod test {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn create_documents_no_errors() {
|
fn create_documents_no_errors() {
|
||||||
let json = json!({
|
let value = json!({
|
||||||
"number": 1,
|
"number": 1,
|
||||||
"string": "this is a field",
|
"string": "this is a field",
|
||||||
"array": ["an", "array"],
|
"array": ["an", "array"],
|
||||||
@ -169,26 +169,17 @@ mod test {
|
|||||||
"bool": true
|
"bool": true
|
||||||
});
|
});
|
||||||
|
|
||||||
let json = serde_json::to_vec(&json).unwrap();
|
let mut builder = DocumentsBatchBuilder::new(Vec::new());
|
||||||
|
builder.append_json_object(value.as_object().unwrap()).unwrap();
|
||||||
let mut v = Vec::new();
|
let vector = builder.into_inner().unwrap();
|
||||||
let mut cursor = io::Cursor::new(&mut v);
|
|
||||||
|
|
||||||
let mut builder = DocumentBatchBuilder::new(&mut cursor).unwrap();
|
|
||||||
|
|
||||||
builder.extend_from_json(Cursor::new(json)).unwrap();
|
|
||||||
|
|
||||||
builder.finish().unwrap();
|
|
||||||
|
|
||||||
let mut documents =
|
let mut documents =
|
||||||
DocumentBatchReader::from_reader(io::Cursor::new(cursor.into_inner())).unwrap();
|
DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap().into_cursor();
|
||||||
|
|
||||||
assert_eq!(documents.index().iter().count(), 5);
|
assert_eq!(documents.documents_batch_index().iter().count(), 5);
|
||||||
|
let reader = documents.next_document().unwrap().unwrap();
|
||||||
let reader = documents.next_document_with_index().unwrap().unwrap();
|
assert_eq!(reader.iter().count(), 5);
|
||||||
|
assert!(documents.next_document().unwrap().is_none());
|
||||||
assert_eq!(reader.1.iter().count(), 5);
|
|
||||||
assert!(documents.next_document_with_index().unwrap().is_none());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -200,101 +191,55 @@ mod test {
|
|||||||
"toto": false,
|
"toto": false,
|
||||||
});
|
});
|
||||||
|
|
||||||
let doc1 = serde_json::to_vec(&doc1).unwrap();
|
let mut builder = DocumentsBatchBuilder::new(Vec::new());
|
||||||
let doc2 = serde_json::to_vec(&doc2).unwrap();
|
builder.append_json_object(doc1.as_object().unwrap()).unwrap();
|
||||||
|
builder.append_json_object(doc2.as_object().unwrap()).unwrap();
|
||||||
let mut v = Vec::new();
|
let vector = builder.into_inner().unwrap();
|
||||||
let mut cursor = io::Cursor::new(&mut v);
|
|
||||||
|
|
||||||
let mut builder = DocumentBatchBuilder::new(&mut cursor).unwrap();
|
|
||||||
|
|
||||||
builder.extend_from_json(Cursor::new(doc1)).unwrap();
|
|
||||||
builder.extend_from_json(Cursor::new(doc2)).unwrap();
|
|
||||||
|
|
||||||
builder.finish().unwrap();
|
|
||||||
|
|
||||||
let mut documents =
|
let mut documents =
|
||||||
DocumentBatchReader::from_reader(io::Cursor::new(cursor.into_inner())).unwrap();
|
DocumentsBatchReader::from_reader(io::Cursor::new(vector)).unwrap().into_cursor();
|
||||||
|
assert_eq!(documents.documents_batch_index().iter().count(), 2);
|
||||||
assert_eq!(documents.index().iter().count(), 2);
|
let reader = documents.next_document().unwrap().unwrap();
|
||||||
|
assert_eq!(reader.iter().count(), 1);
|
||||||
let reader = documents.next_document_with_index().unwrap().unwrap();
|
assert!(documents.next_document().unwrap().is_some());
|
||||||
|
assert!(documents.next_document().unwrap().is_none());
|
||||||
assert_eq!(reader.1.iter().count(), 1);
|
|
||||||
assert!(documents.next_document_with_index().unwrap().is_some());
|
|
||||||
assert!(documents.next_document_with_index().unwrap().is_none());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn add_documents_array() {
|
|
||||||
let docs = json!([
|
|
||||||
{ "toto": false },
|
|
||||||
{ "tata": "hello" },
|
|
||||||
]);
|
|
||||||
|
|
||||||
let docs = serde_json::to_vec(&docs).unwrap();
|
|
||||||
|
|
||||||
let mut v = Vec::new();
|
|
||||||
let mut cursor = io::Cursor::new(&mut v);
|
|
||||||
|
|
||||||
let mut builder = DocumentBatchBuilder::new(&mut cursor).unwrap();
|
|
||||||
|
|
||||||
builder.extend_from_json(Cursor::new(docs)).unwrap();
|
|
||||||
|
|
||||||
builder.finish().unwrap();
|
|
||||||
|
|
||||||
let mut documents =
|
|
||||||
DocumentBatchReader::from_reader(io::Cursor::new(cursor.into_inner())).unwrap();
|
|
||||||
|
|
||||||
assert_eq!(documents.index().iter().count(), 2);
|
|
||||||
|
|
||||||
let reader = documents.next_document_with_index().unwrap().unwrap();
|
|
||||||
|
|
||||||
assert_eq!(reader.1.iter().count(), 1);
|
|
||||||
assert!(documents.next_document_with_index().unwrap().is_some());
|
|
||||||
assert!(documents.next_document_with_index().unwrap().is_none());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn add_invalid_document_format() {
|
|
||||||
let mut v = Vec::new();
|
|
||||||
let mut cursor = io::Cursor::new(&mut v);
|
|
||||||
|
|
||||||
let mut builder = DocumentBatchBuilder::new(&mut cursor).unwrap();
|
|
||||||
|
|
||||||
let docs = json!([[
|
|
||||||
{ "toto": false },
|
|
||||||
{ "tata": "hello" },
|
|
||||||
]]);
|
|
||||||
|
|
||||||
let docs = serde_json::to_vec(&docs).unwrap();
|
|
||||||
assert!(builder.extend_from_json(Cursor::new(docs)).is_err());
|
|
||||||
|
|
||||||
let docs = json!("hello");
|
|
||||||
let docs = serde_json::to_vec(&docs).unwrap();
|
|
||||||
|
|
||||||
assert!(builder.extend_from_json(Cursor::new(docs)).is_err());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_nested() {
|
fn test_nested() {
|
||||||
let mut docs = documents!([{
|
let docs_reader = documents!([{
|
||||||
"hello": {
|
"hello": {
|
||||||
"toto": ["hello"]
|
"toto": ["hello"]
|
||||||
}
|
}
|
||||||
}]);
|
}]);
|
||||||
|
|
||||||
let (_index, doc) = docs.next_document_with_index().unwrap().unwrap();
|
let mut cursor = docs_reader.into_cursor();
|
||||||
|
let doc = cursor.next_document().unwrap().unwrap();
|
||||||
let nested: Value = serde_json::from_slice(doc.get(0).unwrap()).unwrap();
|
let nested: Value = serde_json::from_slice(doc.get(0).unwrap()).unwrap();
|
||||||
assert_eq!(nested, json!({ "toto": ["hello"] }));
|
assert_eq!(nested, json!({ "toto": ["hello"] }));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn out_of_order_fields() {
|
fn out_of_order_json_fields() {
|
||||||
let _documents = documents!([
|
let _documents = documents!([
|
||||||
{"id": 1,"b": 0},
|
{"id": 1,"b": 0},
|
||||||
{"id": 2,"a": 0,"b": 0},
|
{"id": 2,"a": 0,"b": 0},
|
||||||
]);
|
]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn out_of_order_csv_fields() {
|
||||||
|
let csv1_content = "id:number,b\n1,0";
|
||||||
|
let csv1 = csv::Reader::from_reader(Cursor::new(csv1_content));
|
||||||
|
|
||||||
|
let csv2_content = "id:number,a,b\n2,0,0";
|
||||||
|
let csv2 = csv::Reader::from_reader(Cursor::new(csv2_content));
|
||||||
|
|
||||||
|
let mut builder = DocumentsBatchBuilder::new(Vec::new());
|
||||||
|
builder.append_csv(csv1).unwrap();
|
||||||
|
builder.append_csv(csv2).unwrap();
|
||||||
|
let vector = builder.into_inner().unwrap();
|
||||||
|
|
||||||
|
DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -35,7 +35,7 @@ mod test {
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use serde_json::{json, Value};
|
use serde_json::{json, Value};
|
||||||
|
|
||||||
use crate::documents::{DocumentBatchBuilder, DocumentBatchReader};
|
use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||||
use crate::index::tests::TempIndex;
|
use crate::index::tests::TempIndex;
|
||||||
use crate::index::Index;
|
use crate::index::Index;
|
||||||
use crate::update::{
|
use crate::update::{
|
||||||
@ -43,14 +43,11 @@ mod test {
|
|||||||
};
|
};
|
||||||
use crate::{DocumentId, FieldId, BEU32};
|
use crate::{DocumentId, FieldId, BEU32};
|
||||||
|
|
||||||
static JSON: Lazy<Vec<u8>> = Lazy::new(generate_documents);
|
static JSON: Lazy<Vec<u8>> = Lazy::new(|| {
|
||||||
|
|
||||||
fn generate_documents() -> Vec<u8> {
|
|
||||||
let mut rng = rand::thread_rng();
|
let mut rng = rand::thread_rng();
|
||||||
let num_docs = rng.gen_range(10..30);
|
let num_docs = rng.gen_range(10..30);
|
||||||
|
|
||||||
let mut cursor = Cursor::new(Vec::new());
|
let mut builder = DocumentsBatchBuilder::new(Vec::new());
|
||||||
let mut builder = DocumentBatchBuilder::new(&mut cursor).unwrap();
|
|
||||||
let txts = ["Toto", "Titi", "Tata"];
|
let txts = ["Toto", "Titi", "Tata"];
|
||||||
let cats = (1..10).map(|i| i.to_string()).collect::<Vec<_>>();
|
let cats = (1..10).map(|i| i.to_string()).collect::<Vec<_>>();
|
||||||
let cat_ints = (1..10).collect::<Vec<_>>();
|
let cat_ints = (1..10).collect::<Vec<_>>();
|
||||||
@ -63,7 +60,7 @@ mod test {
|
|||||||
let mut sample_ints = cat_ints.clone();
|
let mut sample_ints = cat_ints.clone();
|
||||||
sample_ints.shuffle(&mut rng);
|
sample_ints.shuffle(&mut rng);
|
||||||
|
|
||||||
let doc = json!({
|
let json = json!({
|
||||||
"id": i,
|
"id": i,
|
||||||
"txt": txt,
|
"txt": txt,
|
||||||
"cat-int": rng.gen_range(0..3),
|
"cat-int": rng.gen_range(0..3),
|
||||||
@ -71,13 +68,16 @@ mod test {
|
|||||||
"cat-ints": sample_ints[..(rng.gen_range(0..3))],
|
"cat-ints": sample_ints[..(rng.gen_range(0..3))],
|
||||||
});
|
});
|
||||||
|
|
||||||
let doc = Cursor::new(serde_json::to_vec(&doc).unwrap());
|
let object = match json {
|
||||||
builder.extend_from_json(doc).unwrap();
|
Value::Object(object) => object,
|
||||||
|
_ => panic!(),
|
||||||
|
};
|
||||||
|
|
||||||
|
builder.append_json_object(&object).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
builder.finish().unwrap();
|
builder.into_inner().unwrap()
|
||||||
cursor.into_inner()
|
});
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns a temporary index populated with random test documents, the FieldId for the
|
/// Returns a temporary index populated with random test documents, the FieldId for the
|
||||||
/// distinct attribute, and the RoaringBitmap with the document ids.
|
/// distinct attribute, and the RoaringBitmap with the document ids.
|
||||||
@ -101,7 +101,8 @@ mod test {
|
|||||||
IndexDocuments::new(&mut txn, &index, &config, indexing_config, |_| ()).unwrap();
|
IndexDocuments::new(&mut txn, &index, &config, indexing_config, |_| ()).unwrap();
|
||||||
|
|
||||||
let reader =
|
let reader =
|
||||||
crate::documents::DocumentBatchReader::from_reader(Cursor::new(&*JSON)).unwrap();
|
crate::documents::DocumentsBatchReader::from_reader(Cursor::new(JSON.as_slice()))
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
addition.add_documents(reader).unwrap();
|
addition.add_documents(reader).unwrap();
|
||||||
addition.execute().unwrap();
|
addition.execute().unwrap();
|
||||||
@ -109,8 +110,8 @@ mod test {
|
|||||||
let fields_map = index.fields_ids_map(&txn).unwrap();
|
let fields_map = index.fields_ids_map(&txn).unwrap();
|
||||||
let fid = fields_map.id(&distinct).unwrap();
|
let fid = fields_map.id(&distinct).unwrap();
|
||||||
|
|
||||||
let documents = DocumentBatchReader::from_reader(Cursor::new(&*JSON)).unwrap();
|
let documents = DocumentsBatchReader::from_reader(Cursor::new(JSON.as_slice())).unwrap();
|
||||||
let map = (0..documents.len() as u32).collect();
|
let map = (0..documents.documents_count() as u32).collect();
|
||||||
|
|
||||||
txn.commit().unwrap();
|
txn.commit().unwrap();
|
||||||
|
|
||||||
|
@ -25,7 +25,7 @@ pub use self::helpers::{
|
|||||||
};
|
};
|
||||||
use self::helpers::{grenad_obkv_into_chunks, GrenadParameters};
|
use self::helpers::{grenad_obkv_into_chunks, GrenadParameters};
|
||||||
pub use self::transform::{Transform, TransformOutput};
|
pub use self::transform::{Transform, TransformOutput};
|
||||||
use crate::documents::DocumentBatchReader;
|
use crate::documents::DocumentsBatchReader;
|
||||||
pub use crate::update::index_documents::helpers::CursorClonableMmap;
|
pub use crate::update::index_documents::helpers::CursorClonableMmap;
|
||||||
use crate::update::{
|
use crate::update::{
|
||||||
self, Facets, IndexerConfig, UpdateIndexingStep, WordPrefixDocids,
|
self, Facets, IndexerConfig, UpdateIndexingStep, WordPrefixDocids,
|
||||||
@ -121,7 +121,7 @@ where
|
|||||||
/// builder, and the builder must be discarded.
|
/// builder, and the builder must be discarded.
|
||||||
///
|
///
|
||||||
/// Returns the number of documents added to the builder.
|
/// Returns the number of documents added to the builder.
|
||||||
pub fn add_documents<R>(&mut self, reader: DocumentBatchReader<R>) -> Result<u64>
|
pub fn add_documents<R>(&mut self, reader: DocumentsBatchReader<R>) -> Result<u64>
|
||||||
where
|
where
|
||||||
R: Read + Seek,
|
R: Read + Seek,
|
||||||
{
|
{
|
||||||
@ -590,9 +590,8 @@ mod tests {
|
|||||||
use maplit::hashset;
|
use maplit::hashset;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::documents::DocumentBatchBuilder;
|
use crate::documents::DocumentsBatchBuilder;
|
||||||
use crate::update::DeleteDocuments;
|
use crate::update::DeleteDocuments;
|
||||||
use crate::HashMap;
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn simple_document_replacement() {
|
fn simple_document_replacement() {
|
||||||
@ -1252,21 +1251,17 @@ mod tests {
|
|||||||
|
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
|
|
||||||
let mut big_object = HashMap::new();
|
let mut big_object = serde_json::Map::new();
|
||||||
big_object.insert(S("id"), "wow");
|
big_object.insert(S("id"), serde_json::Value::from("wow"));
|
||||||
for i in 0..1000 {
|
for i in 0..1000 {
|
||||||
let key = i.to_string();
|
let key = i.to_string();
|
||||||
big_object.insert(key, "I am a text!");
|
big_object.insert(key, serde_json::Value::from("I am a text!"));
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut cursor = Cursor::new(Vec::new());
|
let mut builder = DocumentsBatchBuilder::new(Vec::new());
|
||||||
|
builder.append_json_object(&big_object).unwrap();
|
||||||
let mut builder = DocumentBatchBuilder::new(&mut cursor).unwrap();
|
let vector = builder.into_inner().unwrap();
|
||||||
let big_object = Cursor::new(serde_json::to_vec(&big_object).unwrap());
|
let content = DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap();
|
||||||
builder.extend_from_json(big_object).unwrap();
|
|
||||||
builder.finish().unwrap();
|
|
||||||
cursor.set_position(0);
|
|
||||||
let content = DocumentBatchReader::from_reader(cursor).unwrap();
|
|
||||||
|
|
||||||
let config = IndexerConfig::default();
|
let config = IndexerConfig::default();
|
||||||
let indexing_config = IndexDocumentsConfig::default();
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
@ -1288,23 +1283,19 @@ mod tests {
|
|||||||
|
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
|
|
||||||
let mut big_object = HashMap::new();
|
let mut big_object = serde_json::Map::new();
|
||||||
big_object.insert(S("id"), "wow");
|
big_object.insert(S("id"), serde_json::Value::from("wow"));
|
||||||
let content: String = (0..=u16::MAX)
|
let content: String = (0..=u16::MAX)
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|p| p.to_string())
|
.map(|p| p.to_string())
|
||||||
.reduce(|a, b| a + " " + b.as_ref())
|
.reduce(|a, b| a + " " + b.as_ref())
|
||||||
.unwrap();
|
.unwrap();
|
||||||
big_object.insert("content".to_string(), &content);
|
big_object.insert("content".to_string(), serde_json::Value::from(content));
|
||||||
|
|
||||||
let mut cursor = Cursor::new(Vec::new());
|
let mut builder = DocumentsBatchBuilder::new(Vec::new());
|
||||||
|
builder.append_json_object(&big_object).unwrap();
|
||||||
let big_object = serde_json::to_string(&big_object).unwrap();
|
let vector = builder.into_inner().unwrap();
|
||||||
let mut builder = DocumentBatchBuilder::new(&mut cursor).unwrap();
|
let content = DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap();
|
||||||
builder.extend_from_json(&mut big_object.as_bytes()).unwrap();
|
|
||||||
builder.finish().unwrap();
|
|
||||||
cursor.set_position(0);
|
|
||||||
let content = DocumentBatchReader::from_reader(cursor).unwrap();
|
|
||||||
|
|
||||||
let config = IndexerConfig::default();
|
let config = IndexerConfig::default();
|
||||||
let indexing_config = IndexDocumentsConfig::default();
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
@ -1843,18 +1834,20 @@ mod tests {
|
|||||||
|
|
||||||
// Create 200 documents with a long text
|
// Create 200 documents with a long text
|
||||||
let content = {
|
let content = {
|
||||||
let documents: Vec<_> = (0..200i32)
|
let documents_iter = (0..200i32)
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|i| serde_json::json!({ "id": i, "script": script }))
|
.map(|i| serde_json::json!({ "id": i, "script": script }))
|
||||||
.collect();
|
.filter_map(|json| match json {
|
||||||
|
serde_json::Value::Object(object) => Some(object),
|
||||||
|
_ => None,
|
||||||
|
});
|
||||||
|
|
||||||
let mut writer = std::io::Cursor::new(Vec::new());
|
let mut builder = crate::documents::DocumentsBatchBuilder::new(Vec::new());
|
||||||
let mut builder = crate::documents::DocumentBatchBuilder::new(&mut writer).unwrap();
|
for object in documents_iter {
|
||||||
let documents = serde_json::to_vec(&documents).unwrap();
|
builder.append_json_object(&object).unwrap();
|
||||||
builder.extend_from_json(std::io::Cursor::new(documents)).unwrap();
|
}
|
||||||
builder.finish().unwrap();
|
let vector = builder.into_inner().unwrap();
|
||||||
writer.set_position(0);
|
crate::documents::DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap()
|
||||||
crate::documents::DocumentBatchReader::from_reader(writer).unwrap()
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// Index those 200 long documents
|
// Index those 200 long documents
|
||||||
|
@ -14,7 +14,7 @@ use smartstring::SmartString;
|
|||||||
|
|
||||||
use super::helpers::{create_sorter, create_writer, keep_latest_obkv, merge_obkvs, MergeFn};
|
use super::helpers::{create_sorter, create_writer, keep_latest_obkv, merge_obkvs, MergeFn};
|
||||||
use super::{IndexDocumentsMethod, IndexerConfig};
|
use super::{IndexDocumentsMethod, IndexerConfig};
|
||||||
use crate::documents::{DocumentBatchReader, DocumentsBatchIndex};
|
use crate::documents::{DocumentsBatchIndex, DocumentsBatchReader};
|
||||||
use crate::error::{Error, InternalError, UserError};
|
use crate::error::{Error, InternalError, UserError};
|
||||||
use crate::index::db_name;
|
use crate::index::db_name;
|
||||||
use crate::update::{AvailableDocumentsIds, UpdateIndexingStep};
|
use crate::update::{AvailableDocumentsIds, UpdateIndexingStep};
|
||||||
@ -152,7 +152,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
|
|
||||||
pub fn read_documents<R, F>(
|
pub fn read_documents<R, F>(
|
||||||
&mut self,
|
&mut self,
|
||||||
mut reader: DocumentBatchReader<R>,
|
reader: DocumentsBatchReader<R>,
|
||||||
wtxn: &mut heed::RwTxn,
|
wtxn: &mut heed::RwTxn,
|
||||||
progress_callback: F,
|
progress_callback: F,
|
||||||
) -> Result<usize>
|
) -> Result<usize>
|
||||||
@ -160,7 +160,8 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
R: Read + Seek,
|
R: Read + Seek,
|
||||||
F: Fn(UpdateIndexingStep) + Sync,
|
F: Fn(UpdateIndexingStep) + Sync,
|
||||||
{
|
{
|
||||||
let fields_index = reader.index();
|
let mut cursor = reader.into_cursor();
|
||||||
|
let fields_index = cursor.documents_batch_index();
|
||||||
let external_documents_ids = self.index.external_documents_ids(wtxn)?;
|
let external_documents_ids = self.index.external_documents_ids(wtxn)?;
|
||||||
|
|
||||||
let mapping = create_fields_mapping(&mut self.fields_ids_map, fields_index)?;
|
let mapping = create_fields_mapping(&mut self.fields_ids_map, fields_index)?;
|
||||||
@ -186,7 +187,8 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
let mut documents_count = 0;
|
let mut documents_count = 0;
|
||||||
let mut external_id_buffer = Vec::new();
|
let mut external_id_buffer = Vec::new();
|
||||||
let mut field_buffer: Vec<(u16, Cow<[u8]>)> = Vec::new();
|
let mut field_buffer: Vec<(u16, Cow<[u8]>)> = Vec::new();
|
||||||
while let Some((addition_index, document)) = reader.next_document_with_index()? {
|
let addition_index = cursor.documents_batch_index().clone();
|
||||||
|
while let Some(document) = cursor.next_document()? {
|
||||||
let mut field_buffer_cache = drop_and_reuse(field_buffer);
|
let mut field_buffer_cache = drop_and_reuse(field_buffer);
|
||||||
if self.indexer_settings.log_every_n.map_or(false, |len| documents_count % len == 0) {
|
if self.indexer_settings.log_every_n.map_or(false, |len| documents_count % len == 0) {
|
||||||
progress_callback(UpdateIndexingStep::RemapDocumentAddition {
|
progress_callback(UpdateIndexingStep::RemapDocumentAddition {
|
||||||
@ -840,7 +842,7 @@ fn update_primary_key<'a>(
|
|||||||
None => {
|
None => {
|
||||||
let mut json = Map::new();
|
let mut json = Map::new();
|
||||||
for (key, value) in document.iter() {
|
for (key, value) in document.iter() {
|
||||||
let key = addition_index.name(key).cloned();
|
let key = addition_index.name(key).map(ToString::to_string);
|
||||||
let value = serde_json::from_slice::<Value>(&value).ok();
|
let value = serde_json::from_slice::<Value>(&value).ok();
|
||||||
|
|
||||||
if let Some((k, v)) = key.zip(value) {
|
if let Some((k, v)) = key.zip(value) {
|
||||||
|
@ -3,9 +3,10 @@ use std::io::Cursor;
|
|||||||
use big_s::S;
|
use big_s::S;
|
||||||
use heed::EnvOpenOptions;
|
use heed::EnvOpenOptions;
|
||||||
use maplit::hashset;
|
use maplit::hashset;
|
||||||
use milli::documents::{DocumentBatchBuilder, DocumentBatchReader};
|
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||||
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
||||||
use milli::{FacetDistribution, Index};
|
use milli::{FacetDistribution, Index};
|
||||||
|
use serde_json::{Deserializer, Map, Value};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_facet_distribution_with_no_facet_values() {
|
fn test_facet_distribution_with_no_facet_values() {
|
||||||
@ -30,35 +31,30 @@ fn test_facet_distribution_with_no_facet_values() {
|
|||||||
|
|
||||||
let mut builder =
|
let mut builder =
|
||||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
|
||||||
let mut cursor = Cursor::new(Vec::new());
|
let mut documents_builder = DocumentsBatchBuilder::new(Vec::new());
|
||||||
let mut documents_builder = DocumentBatchBuilder::new(&mut cursor).unwrap();
|
|
||||||
let reader = Cursor::new(
|
let reader = Cursor::new(
|
||||||
r#"[
|
r#"{
|
||||||
{
|
|
||||||
"id": 123,
|
"id": 123,
|
||||||
"title": "What a week, hu...",
|
"title": "What a week, hu...",
|
||||||
"genres": [],
|
"genres": [],
|
||||||
"tags": ["blue"]
|
"tags": ["blue"]
|
||||||
},
|
}
|
||||||
{
|
{
|
||||||
"id": 345,
|
"id": 345,
|
||||||
"title": "I am the pig!",
|
"title": "I am the pig!",
|
||||||
"tags": ["red"]
|
"tags": ["red"]
|
||||||
}
|
}"#,
|
||||||
]"#,
|
|
||||||
);
|
);
|
||||||
|
|
||||||
for doc in serde_json::Deserializer::from_reader(reader).into_iter::<serde_json::Value>() {
|
for result in Deserializer::from_reader(reader).into_iter::<Map<String, Value>>() {
|
||||||
let doc = Cursor::new(serde_json::to_vec(&doc.unwrap()).unwrap());
|
let object = result.unwrap();
|
||||||
documents_builder.extend_from_json(doc).unwrap();
|
documents_builder.append_json_object(&object).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
documents_builder.finish().unwrap();
|
let vector = documents_builder.into_inner().unwrap();
|
||||||
|
|
||||||
cursor.set_position(0);
|
|
||||||
|
|
||||||
// index documents
|
// index documents
|
||||||
let content = DocumentBatchReader::from_reader(cursor).unwrap();
|
let content = DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap();
|
||||||
builder.add_documents(content).unwrap();
|
builder.add_documents(content).unwrap();
|
||||||
builder.execute().unwrap();
|
builder.execute().unwrap();
|
||||||
|
|
||||||
|
@ -6,10 +6,11 @@ use big_s::S;
|
|||||||
use either::{Either, Left, Right};
|
use either::{Either, Left, Right};
|
||||||
use heed::EnvOpenOptions;
|
use heed::EnvOpenOptions;
|
||||||
use maplit::{hashmap, hashset};
|
use maplit::{hashmap, hashset};
|
||||||
use milli::documents::{DocumentBatchBuilder, DocumentBatchReader};
|
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||||
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
||||||
use milli::{AscDesc, Criterion, DocumentId, Index, Member};
|
use milli::{AscDesc, Criterion, DocumentId, Index, Member};
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
|
use serde_json::{Deserializer, Map, Value};
|
||||||
use slice_group_by::GroupBy;
|
use slice_group_by::GroupBy;
|
||||||
|
|
||||||
mod distinct;
|
mod distinct;
|
||||||
@ -62,21 +63,18 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
|||||||
|
|
||||||
let mut builder =
|
let mut builder =
|
||||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
|
||||||
let mut cursor = Cursor::new(Vec::new());
|
let mut documents_builder = DocumentsBatchBuilder::new(Vec::new());
|
||||||
let mut documents_builder = DocumentBatchBuilder::new(&mut cursor).unwrap();
|
|
||||||
let reader = Cursor::new(CONTENT.as_bytes());
|
let reader = Cursor::new(CONTENT.as_bytes());
|
||||||
|
|
||||||
for doc in serde_json::Deserializer::from_reader(reader).into_iter::<serde_json::Value>() {
|
for result in Deserializer::from_reader(reader).into_iter::<Map<String, Value>>() {
|
||||||
let doc = Cursor::new(serde_json::to_vec(&doc.unwrap()).unwrap());
|
let object = result.unwrap();
|
||||||
documents_builder.extend_from_json(doc).unwrap();
|
documents_builder.append_json_object(&object).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
documents_builder.finish().unwrap();
|
let vector = documents_builder.into_inner().unwrap();
|
||||||
|
|
||||||
cursor.set_position(0);
|
|
||||||
|
|
||||||
// index documents
|
// index documents
|
||||||
let content = DocumentBatchReader::from_reader(cursor).unwrap();
|
let content = DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap();
|
||||||
builder.add_documents(content).unwrap();
|
builder.add_documents(content).unwrap();
|
||||||
builder.execute().unwrap();
|
builder.execute().unwrap();
|
||||||
|
|
||||||
|
@ -5,7 +5,7 @@ use big_s::S;
|
|||||||
use heed::EnvOpenOptions;
|
use heed::EnvOpenOptions;
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use maplit::hashset;
|
use maplit::hashset;
|
||||||
use milli::documents::{DocumentBatchBuilder, DocumentBatchReader};
|
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||||
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
||||||
use milli::{AscDesc, Criterion, Index, Member, Search, SearchResult};
|
use milli::{AscDesc, Criterion, Index, Member, Search, SearchResult};
|
||||||
use rand::Rng;
|
use rand::Rng;
|
||||||
@ -393,8 +393,7 @@ fn criteria_ascdesc() {
|
|||||||
let mut builder =
|
let mut builder =
|
||||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
|
||||||
|
|
||||||
let mut cursor = Cursor::new(Vec::new());
|
let mut batch_builder = DocumentsBatchBuilder::new(Vec::new());
|
||||||
let mut batch_builder = DocumentBatchBuilder::new(&mut cursor).unwrap();
|
|
||||||
|
|
||||||
(0..ASC_DESC_CANDIDATES_THRESHOLD + 1).for_each(|_| {
|
(0..ASC_DESC_CANDIDATES_THRESHOLD + 1).for_each(|_| {
|
||||||
let mut rng = rand::thread_rng();
|
let mut rng = rand::thread_rng();
|
||||||
@ -412,16 +411,17 @@ fn criteria_ascdesc() {
|
|||||||
"age": age,
|
"age": age,
|
||||||
});
|
});
|
||||||
|
|
||||||
let json = Cursor::new(serde_json::to_vec(&json).unwrap());
|
let object = match json {
|
||||||
batch_builder.extend_from_json(json).unwrap();
|
serde_json::Value::Object(object) => object,
|
||||||
|
_ => panic!(),
|
||||||
|
};
|
||||||
|
|
||||||
|
batch_builder.append_json_object(&object).unwrap();
|
||||||
});
|
});
|
||||||
|
|
||||||
batch_builder.finish().unwrap();
|
let vector = batch_builder.into_inner().unwrap();
|
||||||
|
|
||||||
cursor.set_position(0);
|
|
||||||
|
|
||||||
let reader = DocumentBatchReader::from_reader(cursor).unwrap();
|
|
||||||
|
|
||||||
|
let reader = DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap();
|
||||||
builder.add_documents(reader).unwrap();
|
builder.add_documents(reader).unwrap();
|
||||||
builder.execute().unwrap();
|
builder.execute().unwrap();
|
||||||
|
|
||||||
|
@ -106,26 +106,23 @@ fn test_typo_disabled_on_word() {
|
|||||||
options.map_size(4096 * 100);
|
options.map_size(4096 * 100);
|
||||||
let index = Index::new(options, tmp.path()).unwrap();
|
let index = Index::new(options, tmp.path()).unwrap();
|
||||||
|
|
||||||
let documents = json!([
|
let mut builder = milli::documents::DocumentsBatchBuilder::new(Vec::new());
|
||||||
{
|
let doc1 = json!({
|
||||||
"id": 1usize,
|
"id": 1usize,
|
||||||
"data": "zealand",
|
"data": "zealand",
|
||||||
},
|
});
|
||||||
{
|
|
||||||
|
let doc2 = json!({
|
||||||
"id": 2usize,
|
"id": 2usize,
|
||||||
"data": "zearand",
|
"data": "zearand",
|
||||||
},
|
});
|
||||||
]);
|
|
||||||
|
|
||||||
let mut writer = std::io::Cursor::new(Vec::new());
|
builder.append_json_object(doc1.as_object().unwrap()).unwrap();
|
||||||
let mut builder = milli::documents::DocumentBatchBuilder::new(&mut writer).unwrap();
|
builder.append_json_object(doc2.as_object().unwrap()).unwrap();
|
||||||
let documents = serde_json::to_vec(&documents).unwrap();
|
let vector = builder.into_inner().unwrap();
|
||||||
builder.extend_from_json(std::io::Cursor::new(documents)).unwrap();
|
|
||||||
builder.finish().unwrap();
|
|
||||||
|
|
||||||
writer.set_position(0);
|
let documents =
|
||||||
|
milli::documents::DocumentsBatchReader::from_reader(std::io::Cursor::new(vector)).unwrap();
|
||||||
let documents = milli::documents::DocumentBatchReader::from_reader(writer).unwrap();
|
|
||||||
|
|
||||||
let mut txn = index.write_txn().unwrap();
|
let mut txn = index.write_txn().unwrap();
|
||||||
let config = IndexerConfig::default();
|
let config = IndexerConfig::default();
|
||||||
|
Loading…
Reference in New Issue
Block a user