mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 20:37:15 +02:00
create a new database containing all the documents that were geo-faceted
This commit is contained in:
parent
4b459768a0
commit
ea2f2ecf96
4 changed files with 70 additions and 3 deletions
|
@ -49,6 +49,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
|
|||
self.index.put_documents_ids(self.wtxn, &RoaringBitmap::default())?;
|
||||
self.index.put_field_distribution(self.wtxn, &FieldDistribution::default())?;
|
||||
self.index.delete_geo_rtree(self.wtxn)?;
|
||||
self.index.delete_geo_faceted_documents_ids(self.wtxn)?;
|
||||
|
||||
// We clean all the faceted documents ids.
|
||||
let empty = RoaringBitmap::default();
|
||||
|
@ -116,6 +117,7 @@ mod tests {
|
|||
assert!(index.documents_ids(&rtxn).unwrap().is_empty());
|
||||
assert!(index.field_distribution(&rtxn).unwrap().is_empty());
|
||||
assert!(index.geo_rtree(&rtxn).unwrap().is_none());
|
||||
assert!(index.geo_faceted_documents_ids(&rtxn).unwrap().is_empty());
|
||||
|
||||
assert!(index.word_docids.is_empty(&rtxn).unwrap());
|
||||
assert!(index.word_prefix_docids.is_empty(&rtxn).unwrap());
|
||||
|
|
|
@ -381,6 +381,8 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||
drop(iter);
|
||||
|
||||
if let Some(mut rtree) = self.index.geo_rtree(self.wtxn)? {
|
||||
let mut geo_faceted_doc_ids = self.index.geo_faceted_documents_ids(self.wtxn)?;
|
||||
|
||||
let points_to_remove: Vec<_> = rtree
|
||||
.iter()
|
||||
.filter(|&point| self.documents_ids.contains(point.data))
|
||||
|
@ -388,9 +390,11 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||
.collect();
|
||||
points_to_remove.iter().for_each(|point| {
|
||||
rtree.remove(&point);
|
||||
geo_faceted_doc_ids.remove(point.data);
|
||||
});
|
||||
|
||||
self.index.put_geo_rtree(self.wtxn, &rtree)?;
|
||||
self.index.put_geo_faceted_documents_ids(self.wtxn, &geo_faceted_doc_ids)?;
|
||||
}
|
||||
|
||||
// We delete the documents ids that are under the facet field id values.
|
||||
|
@ -555,6 +559,8 @@ where
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::HashSet;
|
||||
|
||||
use big_s::S;
|
||||
use heed::EnvOpenOptions;
|
||||
use maplit::hashset;
|
||||
|
@ -726,11 +732,30 @@ mod tests {
|
|||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let rtree = index.geo_rtree(&rtxn).unwrap().unwrap();
|
||||
let geo_faceted_doc_ids = index.geo_faceted_documents_ids(&rtxn).unwrap();
|
||||
|
||||
let all_geo_ids = rtree.iter().map(|point| point.data).collect::<Vec<_>>();
|
||||
let all_geo_documents = index.documents(&rtxn, all_geo_ids.iter().copied()).unwrap();
|
||||
let all_geo_documents = index
|
||||
.documents(&rtxn, all_geo_ids.iter().copied())
|
||||
.unwrap()
|
||||
.iter()
|
||||
.map(|(id, _)| *id)
|
||||
.collect::<HashSet<_>>();
|
||||
|
||||
for (id, _) in all_geo_documents.iter() {
|
||||
let all_geo_faceted_ids = geo_faceted_doc_ids.iter().collect::<Vec<_>>();
|
||||
let all_geo_faceted_documents = index
|
||||
.documents(&rtxn, all_geo_faceted_ids.iter().copied())
|
||||
.unwrap()
|
||||
.iter()
|
||||
.map(|(id, _)| *id)
|
||||
.collect::<HashSet<_>>();
|
||||
|
||||
assert_eq!(
|
||||
all_geo_documents, all_geo_faceted_documents,
|
||||
"There is an inconsistency between the geo_faceted database and the rtree"
|
||||
);
|
||||
|
||||
for id in all_geo_documents.iter() {
|
||||
assert!(!ids_to_delete.contains(&id), "The document {} was supposed to be deleted", id);
|
||||
}
|
||||
|
||||
|
|
|
@ -182,6 +182,8 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||
TypedChunk::GeoPoints(mut geo_points) => {
|
||||
// TODO: TAMO: we should create the rtree with the `RTree::bulk_load` function
|
||||
let mut rtree = index.geo_rtree(wtxn)?.unwrap_or_default();
|
||||
let mut doc_ids = index.geo_faceted_documents_ids(wtxn)?;
|
||||
|
||||
while let Some((key, value)) = geo_points.next()? {
|
||||
// convert the key back to a u32 (4 bytes)
|
||||
let (key, _) = helpers::try_split_array_at::<u8, 4>(key).unwrap();
|
||||
|
@ -192,8 +194,10 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||
let (lng, _) = helpers::try_split_array_at::<u8, 8>(tail).unwrap();
|
||||
let point = [f64::from_ne_bytes(lat), f64::from_ne_bytes(lng)];
|
||||
rtree.insert(GeoPoint::new(point, key));
|
||||
doc_ids.insert(key);
|
||||
}
|
||||
index.put_geo_rtree(wtxn, &rtree)?;
|
||||
index.put_geo_faceted_documents_ids(wtxn, &doc_ids)?;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue