MeiliSearch/milli/src/update/index_documents/extract/extract_geo_points.rs

48 lines
1.8 KiB
Rust
Raw Normal View History

2021-08-23 18:41:48 +02:00
use std::fs::File;
use std::io;
use concat_arrays::concat_arrays;
use serde_json::Value;
use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
use crate::{FieldId, InternalError, Result, UserError};
2021-08-23 18:41:48 +02:00
/// Extracts the geographical coordinates contained in each document under the `_geo` field.
///
/// Returns the generated grenad reader containing the docid as key associated to the (latitude, longitude)
2022-02-16 15:28:48 +01:00
pub fn extract_geo_points<R: io::Read + io::Seek>(
obkv_documents: grenad::Reader<R>,
2021-08-23 18:41:48 +02:00
indexer: GrenadParameters,
primary_key_id: FieldId,
2021-08-30 15:47:11 +02:00
geo_field_id: FieldId,
2021-08-23 18:41:48 +02:00
) -> Result<grenad::Reader<File>> {
2022-02-16 15:28:48 +01:00
let mut writer = create_writer(
indexer.chunk_compression_type,
indexer.chunk_compression_level,
tempfile::tempfile()?,
);
2021-08-23 18:41:48 +02:00
2022-02-16 15:28:48 +01:00
let mut cursor = obkv_documents.into_cursor()?;
while let Some((docid_bytes, value)) = cursor.move_on_next()? {
2021-08-23 18:41:48 +02:00
let obkv = obkv::KvReader::new(value);
let point: Value = match obkv.get(geo_field_id) {
Some(point) => serde_json::from_slice(point).map_err(InternalError::SerdeJson)?,
2021-08-25 16:59:38 +02:00
None => continue,
};
2021-08-23 18:41:48 +02:00
if let Some((lat, lng)) = point["lat"].as_f64().zip(point["lng"].as_f64()) {
2021-08-23 18:41:48 +02:00
// this will create an array of 16 bytes (two 8 bytes floats)
2021-08-25 16:59:38 +02:00
let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
2021-08-23 18:41:48 +02:00
writer.insert(docid_bytes, bytes)?;
} else {
2021-09-08 18:12:10 +02:00
// All document must have a primary key so we can unwrap safely here
let primary_key = obkv.get(primary_key_id).unwrap();
let primary_key =
serde_json::from_slice(primary_key).map_err(InternalError::SerdeJson)?;
Err(UserError::InvalidGeoField { document_id: primary_key, object: point })?
2021-08-23 18:41:48 +02:00
}
}
Ok(writer_into_reader(writer)?)
}