diff --git a/milli/src/update/index_documents/extract/extract_geo_points.rs b/milli/src/update/index_documents/extract/extract_geo_points.rs index 0a73e5ed4..1849d5f5d 100644 --- a/milli/src/update/index_documents/extract/extract_geo_points.rs +++ b/milli/src/update/index_documents/extract/extract_geo_points.rs @@ -21,19 +21,22 @@ pub fn extract_geo_points( })?; // we never encountered any documents with a `_geo` field. We can skip entirely this step - if geo_field_id.is_none() { - return Ok(writer_into_reader(writer)?); - } - let geo_field_id = geo_field_id.unwrap(); + let geo_field_id = match geo_field_id { + Some(geo) => geo, + None => return Ok(writer_into_reader(writer)?), + }; while let Some((docid_bytes, value)) = obkv_documents.next()? { let obkv = obkv::KvReader::new(value); - let point = obkv.get(geo_field_id).unwrap(); // TODO: TAMO where should we handle this error? + let point = match obkv.get(geo_field_id) { + Some(point) => point, + None => continue, + }; let point: Value = serde_json::from_slice(point).map_err(InternalError::SerdeJson)?; if let Some((lat, lng)) = point["lat"].as_f64().zip(point["lng"].as_f64()) { // this will create an array of 16 bytes (two 8 bytes floats) - let bytes: [u8; 16] = concat_arrays![lat.to_le_bytes(), lng.to_le_bytes()]; + let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()]; writer.insert(docid_bytes, bytes)?; } else { // TAMO: improve the warn diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs index 90a279815..736060b15 100644 --- a/milli/src/update/index_documents/extract/mod.rs +++ b/milli/src/update/index_documents/extract/mod.rs @@ -51,13 +51,14 @@ pub(crate) fn data_from_obkv_documents( lmdb_writer_sx.clone(), &searchable_fields, &faceted_fields, + geo_field_id, &stop_words, ) }) .collect(); let ( - (docid_word_positions_chunks), + docid_word_positions_chunks, (docid_fid_facet_numbers_chunks, docid_fid_facet_strings_chunks), ) = result?; @@ -121,16 +122,6 @@ pub(crate) fn data_from_obkv_documents( "field-id-facet-number-docids", ); - spawn_extraction_task( - documents_chunk, - indexer.clone(), - lmdb_writer_sx.clone(), - move |documents, indexer| extract_geo_points(documents, indexer, geo_field_id), - merge_cbo_roaring_bitmaps, - TypedChunk::GeoPoints, - "geo-points", - ); - Ok(()) } @@ -181,6 +172,7 @@ fn extract_documents_data( lmdb_writer_sx: Sender>, searchable_fields: &Option>, faceted_fields: &HashSet, + geo_field_id: Option, stop_words: &Option>, ) -> Result<( grenad::Reader, @@ -190,6 +182,12 @@ fn extract_documents_data( let _ = lmdb_writer_sx.send(Ok(TypedChunk::Documents(documents_chunk.clone()))); + let (documents_chunk_cloned, lmdb_writer_sx_cloned) = (documents_chunk.clone(), lmdb_writer_sx.clone()); + rayon::spawn(move || { + let geo_points = extract_geo_points(documents_chunk_cloned, indexer, geo_field_id).unwrap(); + lmdb_writer_sx_cloned.send(Ok(TypedChunk::GeoPoints(geo_points))).unwrap(); + }); + let (docid_word_positions_chunk, docid_fid_facet_values_chunks): (Result<_>, Result<_>) = rayon::join( || { diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs index 0dfeabece..9605fea7d 100644 --- a/milli/src/update/index_documents/typed_chunk.rs +++ b/milli/src/update/index_documents/typed_chunk.rs @@ -181,16 +181,16 @@ pub(crate) fn write_typed_chunk_into_index( } TypedChunk::GeoPoints(mut geo_points) => { // TODO: TAMO: we should create the rtree with the `RTree::bulk_load` function - let mut rtree = index.geo_rtree(&index.read_txn()?)?.unwrap_or_default(); + let mut rtree = index.geo_rtree(wtxn)?.unwrap_or_default(); while let Some((key, value)) = geo_points.next()? { // convert the key back to a u32 (4 bytes) let (key, _) = helpers::try_split_array_at::(key).unwrap(); - let key = u32::from_le_bytes(key); + let key = u32::from_be_bytes(key); // convert the latitude and longitude back to a f64 (8 bytes) let (lat, tail) = helpers::try_split_array_at::(value).unwrap(); let (lng, _) = helpers::try_split_array_at::(tail).unwrap(); - let point = [f64::from_le_bytes(lat), f64::from_le_bytes(lng)]; + let point = [f64::from_ne_bytes(lat), f64::from_ne_bytes(lng)]; rtree.insert(GeoPoint::new(point, key)); } index.put_geo_rtree(wtxn, &rtree)?;