mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-11 14:04:31 +01:00
create a new database containing all the documents that were geo-faceted
This commit is contained in:
parent
4b459768a0
commit
ea2f2ecf96
@ -32,7 +32,8 @@ pub mod main_key {
|
||||
pub const SORTABLE_FIELDS_KEY: &str = "sortable-fields";
|
||||
pub const FIELD_DISTRIBUTION_KEY: &str = "fields-distribution";
|
||||
pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map";
|
||||
pub const GEO_RTREE_KEY: &str = "geo";
|
||||
pub const GEO_FACETED_DOCUMENTS_IDS_KEY: &str = "geo-faceted-documents-ids";
|
||||
pub const GEO_RTREE_KEY: &str = "geo-rtree";
|
||||
pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids";
|
||||
pub const NUMBER_FACETED_DOCUMENTS_IDS_PREFIX: &str = "number-faceted-documents-ids";
|
||||
pub const PRIMARY_KEY_KEY: &str = "primary-key";
|
||||
@ -320,6 +321,41 @@ impl Index {
|
||||
}
|
||||
}
|
||||
|
||||
/* geo faceted */
|
||||
|
||||
/// Writes the documents ids that are faceted with a _geo field
|
||||
pub(crate) fn put_geo_faceted_documents_ids(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
docids: &RoaringBitmap,
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, RoaringBitmapCodec>(
|
||||
wtxn,
|
||||
main_key::GEO_FACETED_DOCUMENTS_IDS_KEY,
|
||||
docids,
|
||||
)
|
||||
}
|
||||
|
||||
/// Delete the documents ids that are faceted with a _geo field
|
||||
pub(crate) fn delete_geo_faceted_documents_ids(&self, wtxn: &mut RwTxn) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, RoaringBitmapCodec>(
|
||||
wtxn,
|
||||
main_key::GEO_FACETED_DOCUMENTS_IDS_KEY,
|
||||
&RoaringBitmap::new(),
|
||||
)
|
||||
}
|
||||
|
||||
/// Retrieve all the documents ids that faceted with a _geo field
|
||||
pub fn geo_faceted_documents_ids(&self, rtxn: &RoTxn) -> heed::Result<RoaringBitmap> {
|
||||
match self
|
||||
.main
|
||||
.get::<_, Str, RoaringBitmapCodec>(rtxn, main_key::GEO_FACETED_DOCUMENTS_IDS_KEY)?
|
||||
{
|
||||
Some(docids) => Ok(docids),
|
||||
None => Ok(RoaringBitmap::new()),
|
||||
}
|
||||
}
|
||||
|
||||
/* field distribution */
|
||||
|
||||
/// Writes the field distribution which associates every field name with
|
||||
|
@ -49,6 +49,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
|
||||
self.index.put_documents_ids(self.wtxn, &RoaringBitmap::default())?;
|
||||
self.index.put_field_distribution(self.wtxn, &FieldDistribution::default())?;
|
||||
self.index.delete_geo_rtree(self.wtxn)?;
|
||||
self.index.delete_geo_faceted_documents_ids(self.wtxn)?;
|
||||
|
||||
// We clean all the faceted documents ids.
|
||||
let empty = RoaringBitmap::default();
|
||||
@ -116,6 +117,7 @@ mod tests {
|
||||
assert!(index.documents_ids(&rtxn).unwrap().is_empty());
|
||||
assert!(index.field_distribution(&rtxn).unwrap().is_empty());
|
||||
assert!(index.geo_rtree(&rtxn).unwrap().is_none());
|
||||
assert!(index.geo_faceted_documents_ids(&rtxn).unwrap().is_empty());
|
||||
|
||||
assert!(index.word_docids.is_empty(&rtxn).unwrap());
|
||||
assert!(index.word_prefix_docids.is_empty(&rtxn).unwrap());
|
||||
|
@ -381,6 +381,8 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
||||
drop(iter);
|
||||
|
||||
if let Some(mut rtree) = self.index.geo_rtree(self.wtxn)? {
|
||||
let mut geo_faceted_doc_ids = self.index.geo_faceted_documents_ids(self.wtxn)?;
|
||||
|
||||
let points_to_remove: Vec<_> = rtree
|
||||
.iter()
|
||||
.filter(|&point| self.documents_ids.contains(point.data))
|
||||
@ -388,9 +390,11 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
||||
.collect();
|
||||
points_to_remove.iter().for_each(|point| {
|
||||
rtree.remove(&point);
|
||||
geo_faceted_doc_ids.remove(point.data);
|
||||
});
|
||||
|
||||
self.index.put_geo_rtree(self.wtxn, &rtree)?;
|
||||
self.index.put_geo_faceted_documents_ids(self.wtxn, &geo_faceted_doc_ids)?;
|
||||
}
|
||||
|
||||
// We delete the documents ids that are under the facet field id values.
|
||||
@ -555,6 +559,8 @@ where
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::HashSet;
|
||||
|
||||
use big_s::S;
|
||||
use heed::EnvOpenOptions;
|
||||
use maplit::hashset;
|
||||
@ -726,11 +732,30 @@ mod tests {
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let rtree = index.geo_rtree(&rtxn).unwrap().unwrap();
|
||||
let geo_faceted_doc_ids = index.geo_faceted_documents_ids(&rtxn).unwrap();
|
||||
|
||||
let all_geo_ids = rtree.iter().map(|point| point.data).collect::<Vec<_>>();
|
||||
let all_geo_documents = index.documents(&rtxn, all_geo_ids.iter().copied()).unwrap();
|
||||
let all_geo_documents = index
|
||||
.documents(&rtxn, all_geo_ids.iter().copied())
|
||||
.unwrap()
|
||||
.iter()
|
||||
.map(|(id, _)| *id)
|
||||
.collect::<HashSet<_>>();
|
||||
|
||||
for (id, _) in all_geo_documents.iter() {
|
||||
let all_geo_faceted_ids = geo_faceted_doc_ids.iter().collect::<Vec<_>>();
|
||||
let all_geo_faceted_documents = index
|
||||
.documents(&rtxn, all_geo_faceted_ids.iter().copied())
|
||||
.unwrap()
|
||||
.iter()
|
||||
.map(|(id, _)| *id)
|
||||
.collect::<HashSet<_>>();
|
||||
|
||||
assert_eq!(
|
||||
all_geo_documents, all_geo_faceted_documents,
|
||||
"There is an inconsistency between the geo_faceted database and the rtree"
|
||||
);
|
||||
|
||||
for id in all_geo_documents.iter() {
|
||||
assert!(!ids_to_delete.contains(&id), "The document {} was supposed to be deleted", id);
|
||||
}
|
||||
|
||||
|
@ -182,6 +182,8 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
TypedChunk::GeoPoints(mut geo_points) => {
|
||||
// TODO: TAMO: we should create the rtree with the `RTree::bulk_load` function
|
||||
let mut rtree = index.geo_rtree(wtxn)?.unwrap_or_default();
|
||||
let mut doc_ids = index.geo_faceted_documents_ids(wtxn)?;
|
||||
|
||||
while let Some((key, value)) = geo_points.next()? {
|
||||
// convert the key back to a u32 (4 bytes)
|
||||
let (key, _) = helpers::try_split_array_at::<u8, 4>(key).unwrap();
|
||||
@ -192,8 +194,10 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
let (lng, _) = helpers::try_split_array_at::<u8, 8>(tail).unwrap();
|
||||
let point = [f64::from_ne_bytes(lat), f64::from_ne_bytes(lng)];
|
||||
rtree.insert(GeoPoint::new(point, key));
|
||||
doc_ids.insert(key);
|
||||
}
|
||||
index.put_geo_rtree(wtxn, &rtree)?;
|
||||
index.put_geo_faceted_documents_ids(wtxn, &doc_ids)?;
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user