mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 20:37:15 +02:00
Apply suggestions from code review
Co-authored-by: Clément Renault <clement@meilisearch.com>
This commit is contained in:
parent
c81ff22c5b
commit
a84f3a8b31
13 changed files with 77 additions and 69 deletions
|
@ -383,15 +383,16 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||
if let Some(mut rtree) = self.index.geo_rtree(self.wtxn)? {
|
||||
let mut geo_faceted_doc_ids = self.index.geo_faceted_documents_ids(self.wtxn)?;
|
||||
|
||||
let points_to_remove: Vec<_> = rtree
|
||||
let (points_to_remove, docids_to_remove): (Vec<_>, RoaringBitmap) = rtree
|
||||
.iter()
|
||||
.filter(|&point| self.documents_ids.contains(point.data))
|
||||
.cloned()
|
||||
.collect();
|
||||
.map(|point| (point, point.data))
|
||||
.unzip();
|
||||
points_to_remove.iter().for_each(|point| {
|
||||
rtree.remove(&point);
|
||||
geo_faceted_doc_ids.remove(point.data);
|
||||
});
|
||||
geo_faceted_doc_ids -= docids_to_remove;
|
||||
|
||||
self.index.put_geo_rtree(self.wtxn, &rtree)?;
|
||||
self.index.put_geo_faceted_documents_ids(self.wtxn, &geo_faceted_doc_ids)?;
|
||||
|
|
|
@ -22,11 +22,10 @@ pub fn extract_geo_points<R: io::Read>(
|
|||
|
||||
while let Some((docid_bytes, value)) = obkv_documents.next()? {
|
||||
let obkv = obkv::KvReader::new(value);
|
||||
let point = match obkv.get(geo_field_id) {
|
||||
Some(point) => point,
|
||||
let point: Value = match obkv.get(geo_field_id) {
|
||||
Some(point) => serde_json::from_slice(point).map_err(InternalError::SerdeJson)?,
|
||||
None => continue,
|
||||
};
|
||||
let point: Value = serde_json::from_slice(point).map_err(InternalError::SerdeJson)?;
|
||||
|
||||
if let Some((lat, lng)) = point["lat"].as_f64().zip(point["lng"].as_f64()) {
|
||||
// this will create an array of 16 bytes (two 8 bytes floats)
|
||||
|
|
|
@ -189,12 +189,9 @@ fn extract_documents_data(
|
|||
let documents_chunk_cloned = documents_chunk.clone();
|
||||
let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
|
||||
rayon::spawn(move || {
|
||||
let _ = match extract_geo_points(
|
||||
documents_chunk_cloned,
|
||||
indexer,
|
||||
primary_key_id,
|
||||
geo_field_id,
|
||||
) {
|
||||
let result =
|
||||
extract_geo_points(documents_chunk_cloned, indexer, primary_key_id, geo_field_id);
|
||||
let _ = match result {
|
||||
Ok(geo_points) => lmdb_writer_sx_cloned.send(Ok(TypedChunk::GeoPoints(geo_points))),
|
||||
Err(error) => lmdb_writer_sx_cloned.send(Err(error)),
|
||||
};
|
||||
|
|
|
@ -237,12 +237,17 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||
// get filterable fields for facet databases
|
||||
let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?;
|
||||
// get the fid of the `_geo` field.
|
||||
let geo_field_id = if let Some(gfid) = self.index.fields_ids_map(self.wtxn)?.id("_geo") {
|
||||
(self.index.sortable_fields_ids(self.wtxn)?.contains(&gfid)
|
||||
|| self.index.filterable_fields_ids(self.wtxn)?.contains(&gfid))
|
||||
.then(|| gfid)
|
||||
} else {
|
||||
None
|
||||
let geo_field_id = match self.index.fields_ids_map(self.wtxn)?.id("_geo") {
|
||||
Some(gfid) => {
|
||||
let is_sortable = self.index.sortable_fields_ids(self.wtxn)?.contains(&gfid);
|
||||
let is_filterable = self.index.filterable_fields_ids(self.wtxn)?.contains(&gfid);
|
||||
if is_sortable || is_filterable {
|
||||
Some(gfid)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
|
||||
let stop_words = self.index.stop_words(self.wtxn)?;
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
use std::borrow::Cow;
|
||||
use std::convert::TryInto;
|
||||
use std::fs::File;
|
||||
|
||||
use heed::types::ByteSlice;
|
||||
|
@ -11,7 +12,7 @@ use super::helpers::{
|
|||
};
|
||||
use crate::heed_codec::facet::{decode_prefix_string, encode_prefix_string};
|
||||
use crate::update::index_documents::helpers::into_clonable_grenad;
|
||||
use crate::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, GeoPoint, Index, Result};
|
||||
use crate::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index, Result};
|
||||
|
||||
pub(crate) enum TypedChunk {
|
||||
DocidWordPositions(grenad::Reader<CursorClonableMmap>),
|
||||
|
@ -180,24 +181,22 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||
is_merged_database = true;
|
||||
}
|
||||
TypedChunk::GeoPoints(mut geo_points) => {
|
||||
// TODO: we should create the rtree with the `RTree::bulk_load` function
|
||||
let mut rtree = index.geo_rtree(wtxn)?.unwrap_or_default();
|
||||
let mut doc_ids = index.geo_faceted_documents_ids(wtxn)?;
|
||||
let mut geo_faceted_docids = index.geo_faceted_documents_ids(wtxn)?;
|
||||
|
||||
while let Some((key, value)) = geo_points.next()? {
|
||||
// convert the key back to a u32 (4 bytes)
|
||||
let (key, _) = helpers::try_split_array_at::<u8, 4>(key).unwrap();
|
||||
let key = u32::from_be_bytes(key);
|
||||
let docid = key.try_into().map(DocumentId::from_be_bytes).unwrap();
|
||||
|
||||
// convert the latitude and longitude back to a f64 (8 bytes)
|
||||
let (lat, tail) = helpers::try_split_array_at::<u8, 8>(value).unwrap();
|
||||
let (lng, _) = helpers::try_split_array_at::<u8, 8>(tail).unwrap();
|
||||
let point = [f64::from_ne_bytes(lat), f64::from_ne_bytes(lng)];
|
||||
rtree.insert(GeoPoint::new(point, key));
|
||||
doc_ids.insert(key);
|
||||
rtree.insert(GeoPoint::new(point, docid));
|
||||
geo_faceted_docids.insert(docid);
|
||||
}
|
||||
index.put_geo_rtree(wtxn, &rtree)?;
|
||||
index.put_geo_faceted_documents_ids(wtxn, &doc_ids)?;
|
||||
index.put_geo_faceted_documents_ids(wtxn, &geo_faceted_docids)?;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue