mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-24 13:40:31 +01:00
remove multiple bugs
This commit is contained in:
parent
b4b6ba6d82
commit
70ab2c37c5
@ -21,19 +21,22 @@ pub fn extract_geo_points<R: io::Read>(
|
|||||||
})?;
|
})?;
|
||||||
|
|
||||||
// we never encountered any documents with a `_geo` field. We can skip entirely this step
|
// we never encountered any documents with a `_geo` field. We can skip entirely this step
|
||||||
if geo_field_id.is_none() {
|
let geo_field_id = match geo_field_id {
|
||||||
return Ok(writer_into_reader(writer)?);
|
Some(geo) => geo,
|
||||||
}
|
None => return Ok(writer_into_reader(writer)?),
|
||||||
let geo_field_id = geo_field_id.unwrap();
|
};
|
||||||
|
|
||||||
while let Some((docid_bytes, value)) = obkv_documents.next()? {
|
while let Some((docid_bytes, value)) = obkv_documents.next()? {
|
||||||
let obkv = obkv::KvReader::new(value);
|
let obkv = obkv::KvReader::new(value);
|
||||||
let point = obkv.get(geo_field_id).unwrap(); // TODO: TAMO where should we handle this error?
|
let point = match obkv.get(geo_field_id) {
|
||||||
|
Some(point) => point,
|
||||||
|
None => continue,
|
||||||
|
};
|
||||||
let point: Value = serde_json::from_slice(point).map_err(InternalError::SerdeJson)?;
|
let point: Value = serde_json::from_slice(point).map_err(InternalError::SerdeJson)?;
|
||||||
|
|
||||||
if let Some((lat, lng)) = point["lat"].as_f64().zip(point["lng"].as_f64()) {
|
if let Some((lat, lng)) = point["lat"].as_f64().zip(point["lng"].as_f64()) {
|
||||||
// this will create an array of 16 bytes (two 8 bytes floats)
|
// this will create an array of 16 bytes (two 8 bytes floats)
|
||||||
let bytes: [u8; 16] = concat_arrays![lat.to_le_bytes(), lng.to_le_bytes()];
|
let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
|
||||||
writer.insert(docid_bytes, bytes)?;
|
writer.insert(docid_bytes, bytes)?;
|
||||||
} else {
|
} else {
|
||||||
// TAMO: improve the warn
|
// TAMO: improve the warn
|
||||||
|
@ -51,13 +51,14 @@ pub(crate) fn data_from_obkv_documents(
|
|||||||
lmdb_writer_sx.clone(),
|
lmdb_writer_sx.clone(),
|
||||||
&searchable_fields,
|
&searchable_fields,
|
||||||
&faceted_fields,
|
&faceted_fields,
|
||||||
|
geo_field_id,
|
||||||
&stop_words,
|
&stop_words,
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let (
|
let (
|
||||||
(docid_word_positions_chunks),
|
docid_word_positions_chunks,
|
||||||
(docid_fid_facet_numbers_chunks, docid_fid_facet_strings_chunks),
|
(docid_fid_facet_numbers_chunks, docid_fid_facet_strings_chunks),
|
||||||
) = result?;
|
) = result?;
|
||||||
|
|
||||||
@ -121,16 +122,6 @@ pub(crate) fn data_from_obkv_documents(
|
|||||||
"field-id-facet-number-docids",
|
"field-id-facet-number-docids",
|
||||||
);
|
);
|
||||||
|
|
||||||
spawn_extraction_task(
|
|
||||||
documents_chunk,
|
|
||||||
indexer.clone(),
|
|
||||||
lmdb_writer_sx.clone(),
|
|
||||||
move |documents, indexer| extract_geo_points(documents, indexer, geo_field_id),
|
|
||||||
merge_cbo_roaring_bitmaps,
|
|
||||||
TypedChunk::GeoPoints,
|
|
||||||
"geo-points",
|
|
||||||
);
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -181,6 +172,7 @@ fn extract_documents_data(
|
|||||||
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
||||||
searchable_fields: &Option<HashSet<FieldId>>,
|
searchable_fields: &Option<HashSet<FieldId>>,
|
||||||
faceted_fields: &HashSet<FieldId>,
|
faceted_fields: &HashSet<FieldId>,
|
||||||
|
geo_field_id: Option<FieldId>,
|
||||||
stop_words: &Option<fst::Set<&[u8]>>,
|
stop_words: &Option<fst::Set<&[u8]>>,
|
||||||
) -> Result<(
|
) -> Result<(
|
||||||
grenad::Reader<CursorClonableMmap>,
|
grenad::Reader<CursorClonableMmap>,
|
||||||
@ -190,6 +182,12 @@ fn extract_documents_data(
|
|||||||
|
|
||||||
let _ = lmdb_writer_sx.send(Ok(TypedChunk::Documents(documents_chunk.clone())));
|
let _ = lmdb_writer_sx.send(Ok(TypedChunk::Documents(documents_chunk.clone())));
|
||||||
|
|
||||||
|
let (documents_chunk_cloned, lmdb_writer_sx_cloned) = (documents_chunk.clone(), lmdb_writer_sx.clone());
|
||||||
|
rayon::spawn(move || {
|
||||||
|
let geo_points = extract_geo_points(documents_chunk_cloned, indexer, geo_field_id).unwrap();
|
||||||
|
lmdb_writer_sx_cloned.send(Ok(TypedChunk::GeoPoints(geo_points))).unwrap();
|
||||||
|
});
|
||||||
|
|
||||||
let (docid_word_positions_chunk, docid_fid_facet_values_chunks): (Result<_>, Result<_>) =
|
let (docid_word_positions_chunk, docid_fid_facet_values_chunks): (Result<_>, Result<_>) =
|
||||||
rayon::join(
|
rayon::join(
|
||||||
|| {
|
|| {
|
||||||
|
@ -181,16 +181,16 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
}
|
}
|
||||||
TypedChunk::GeoPoints(mut geo_points) => {
|
TypedChunk::GeoPoints(mut geo_points) => {
|
||||||
// TODO: TAMO: we should create the rtree with the `RTree::bulk_load` function
|
// TODO: TAMO: we should create the rtree with the `RTree::bulk_load` function
|
||||||
let mut rtree = index.geo_rtree(&index.read_txn()?)?.unwrap_or_default();
|
let mut rtree = index.geo_rtree(wtxn)?.unwrap_or_default();
|
||||||
while let Some((key, value)) = geo_points.next()? {
|
while let Some((key, value)) = geo_points.next()? {
|
||||||
// convert the key back to a u32 (4 bytes)
|
// convert the key back to a u32 (4 bytes)
|
||||||
let (key, _) = helpers::try_split_array_at::<u8, 4>(key).unwrap();
|
let (key, _) = helpers::try_split_array_at::<u8, 4>(key).unwrap();
|
||||||
let key = u32::from_le_bytes(key);
|
let key = u32::from_be_bytes(key);
|
||||||
|
|
||||||
// convert the latitude and longitude back to a f64 (8 bytes)
|
// convert the latitude and longitude back to a f64 (8 bytes)
|
||||||
let (lat, tail) = helpers::try_split_array_at::<u8, 8>(value).unwrap();
|
let (lat, tail) = helpers::try_split_array_at::<u8, 8>(value).unwrap();
|
||||||
let (lng, _) = helpers::try_split_array_at::<u8, 8>(tail).unwrap();
|
let (lng, _) = helpers::try_split_array_at::<u8, 8>(tail).unwrap();
|
||||||
let point = [f64::from_le_bytes(lat), f64::from_le_bytes(lng)];
|
let point = [f64::from_ne_bytes(lat), f64::from_ne_bytes(lng)];
|
||||||
rtree.insert(GeoPoint::new(point, key));
|
rtree.insert(GeoPoint::new(point, key));
|
||||||
}
|
}
|
||||||
index.put_geo_rtree(wtxn, &rtree)?;
|
index.put_geo_rtree(wtxn, &rtree)?;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user