mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 20:37:15 +02:00
Merge #561
561: Enriched documents batch reader r=curquiza a=Kerollmops ~This PR is based on #555 and must be rebased on main after it has been merged to ease the review.~ This PR contains the work in #555 and can be merged on main as soon as reviewed and approved. - [x] Create an `EnrichedDocumentsBatchReader` that contains the external documents id. - [x] Extract the primary key name and make it accessible in the `EnrichedDocumentsBatchReader`. - [x] Use the external id from the `EnrichedDocumentsBatchReader` in the `Transform::read_documents`. - [x] Remove the `update_primary_key` from the _transform.rs_ file. - [x] Really generate the auto-generated documents ids. - [x] Insert the (auto-generated) document ids in the document while processing it in `Transform::read_documents`. Co-authored-by: Kerollmops <clement@meilisearch.com>
This commit is contained in:
commit
941af58239
29 changed files with 1668 additions and 1361 deletions
|
@ -35,7 +35,7 @@ mod test {
|
|||
use roaring::RoaringBitmap;
|
||||
use serde_json::{json, Value};
|
||||
|
||||
use crate::documents::{DocumentBatchBuilder, DocumentBatchReader};
|
||||
use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::index::Index;
|
||||
use crate::update::{
|
||||
|
@ -43,14 +43,11 @@ mod test {
|
|||
};
|
||||
use crate::{DocumentId, FieldId, BEU32};
|
||||
|
||||
static JSON: Lazy<Vec<u8>> = Lazy::new(generate_documents);
|
||||
|
||||
fn generate_documents() -> Vec<u8> {
|
||||
static JSON: Lazy<Vec<u8>> = Lazy::new(|| {
|
||||
let mut rng = rand::thread_rng();
|
||||
let num_docs = rng.gen_range(10..30);
|
||||
|
||||
let mut cursor = Cursor::new(Vec::new());
|
||||
let mut builder = DocumentBatchBuilder::new(&mut cursor).unwrap();
|
||||
let mut builder = DocumentsBatchBuilder::new(Vec::new());
|
||||
let txts = ["Toto", "Titi", "Tata"];
|
||||
let cats = (1..10).map(|i| i.to_string()).collect::<Vec<_>>();
|
||||
let cat_ints = (1..10).collect::<Vec<_>>();
|
||||
|
@ -63,7 +60,7 @@ mod test {
|
|||
let mut sample_ints = cat_ints.clone();
|
||||
sample_ints.shuffle(&mut rng);
|
||||
|
||||
let doc = json!({
|
||||
let json = json!({
|
||||
"id": i,
|
||||
"txt": txt,
|
||||
"cat-int": rng.gen_range(0..3),
|
||||
|
@ -71,13 +68,16 @@ mod test {
|
|||
"cat-ints": sample_ints[..(rng.gen_range(0..3))],
|
||||
});
|
||||
|
||||
let doc = Cursor::new(serde_json::to_vec(&doc).unwrap());
|
||||
builder.extend_from_json(doc).unwrap();
|
||||
let object = match json {
|
||||
Value::Object(object) => object,
|
||||
_ => panic!(),
|
||||
};
|
||||
|
||||
builder.append_json_object(&object).unwrap();
|
||||
}
|
||||
|
||||
builder.finish().unwrap();
|
||||
cursor.into_inner()
|
||||
}
|
||||
builder.into_inner().unwrap()
|
||||
});
|
||||
|
||||
/// Returns a temporary index populated with random test documents, the FieldId for the
|
||||
/// distinct attribute, and the RoaringBitmap with the document ids.
|
||||
|
@ -97,20 +97,22 @@ mod test {
|
|||
update_method: IndexDocumentsMethod::ReplaceDocuments,
|
||||
..Default::default()
|
||||
};
|
||||
let mut addition =
|
||||
let addition =
|
||||
IndexDocuments::new(&mut txn, &index, &config, indexing_config, |_| ()).unwrap();
|
||||
|
||||
let reader =
|
||||
crate::documents::DocumentBatchReader::from_reader(Cursor::new(&*JSON)).unwrap();
|
||||
crate::documents::DocumentsBatchReader::from_reader(Cursor::new(JSON.as_slice()))
|
||||
.unwrap();
|
||||
|
||||
addition.add_documents(reader).unwrap();
|
||||
let (addition, user_error) = addition.add_documents(reader).unwrap();
|
||||
user_error.unwrap();
|
||||
addition.execute().unwrap();
|
||||
|
||||
let fields_map = index.fields_ids_map(&txn).unwrap();
|
||||
let fid = fields_map.id(&distinct).unwrap();
|
||||
|
||||
let documents = DocumentBatchReader::from_reader(Cursor::new(&*JSON)).unwrap();
|
||||
let map = (0..documents.len() as u32).collect();
|
||||
let documents = DocumentsBatchReader::from_reader(Cursor::new(JSON.as_slice())).unwrap();
|
||||
let map = (0..documents.documents_count() as u32).collect();
|
||||
|
||||
txn.commit().unwrap();
|
||||
|
||||
|
|
|
@ -648,10 +648,11 @@ mod tests {
|
|||
|
||||
let config = IndexerConfig::default();
|
||||
let indexing_config = IndexDocumentsConfig::default();
|
||||
let mut builder =
|
||||
let builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
|
||||
.unwrap();
|
||||
builder.add_documents(content).unwrap();
|
||||
let (builder, user_error) = builder.add_documents(content).unwrap();
|
||||
user_error.unwrap();
|
||||
builder.execute().unwrap();
|
||||
|
||||
wtxn.commit().unwrap();
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue