mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-25 20:57:35 +01:00
create a new database containing all the documents that were geo-faceted
This commit is contained in:
parent
4b459768a0
commit
ea2f2ecf96
@ -32,7 +32,8 @@ pub mod main_key {
|
|||||||
pub const SORTABLE_FIELDS_KEY: &str = "sortable-fields";
|
pub const SORTABLE_FIELDS_KEY: &str = "sortable-fields";
|
||||||
pub const FIELD_DISTRIBUTION_KEY: &str = "fields-distribution";
|
pub const FIELD_DISTRIBUTION_KEY: &str = "fields-distribution";
|
||||||
pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map";
|
pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map";
|
||||||
pub const GEO_RTREE_KEY: &str = "geo";
|
pub const GEO_FACETED_DOCUMENTS_IDS_KEY: &str = "geo-faceted-documents-ids";
|
||||||
|
pub const GEO_RTREE_KEY: &str = "geo-rtree";
|
||||||
pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids";
|
pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids";
|
||||||
pub const NUMBER_FACETED_DOCUMENTS_IDS_PREFIX: &str = "number-faceted-documents-ids";
|
pub const NUMBER_FACETED_DOCUMENTS_IDS_PREFIX: &str = "number-faceted-documents-ids";
|
||||||
pub const PRIMARY_KEY_KEY: &str = "primary-key";
|
pub const PRIMARY_KEY_KEY: &str = "primary-key";
|
||||||
@ -320,6 +321,41 @@ impl Index {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* geo faceted */
|
||||||
|
|
||||||
|
/// Writes the documents ids that are faceted with a _geo field
|
||||||
|
pub(crate) fn put_geo_faceted_documents_ids(
|
||||||
|
&self,
|
||||||
|
wtxn: &mut RwTxn,
|
||||||
|
docids: &RoaringBitmap,
|
||||||
|
) -> heed::Result<()> {
|
||||||
|
self.main.put::<_, Str, RoaringBitmapCodec>(
|
||||||
|
wtxn,
|
||||||
|
main_key::GEO_FACETED_DOCUMENTS_IDS_KEY,
|
||||||
|
docids,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Delete the documents ids that are faceted with a _geo field
|
||||||
|
pub(crate) fn delete_geo_faceted_documents_ids(&self, wtxn: &mut RwTxn) -> heed::Result<()> {
|
||||||
|
self.main.put::<_, Str, RoaringBitmapCodec>(
|
||||||
|
wtxn,
|
||||||
|
main_key::GEO_FACETED_DOCUMENTS_IDS_KEY,
|
||||||
|
&RoaringBitmap::new(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Retrieve all the documents ids that faceted with a _geo field
|
||||||
|
pub fn geo_faceted_documents_ids(&self, rtxn: &RoTxn) -> heed::Result<RoaringBitmap> {
|
||||||
|
match self
|
||||||
|
.main
|
||||||
|
.get::<_, Str, RoaringBitmapCodec>(rtxn, main_key::GEO_FACETED_DOCUMENTS_IDS_KEY)?
|
||||||
|
{
|
||||||
|
Some(docids) => Ok(docids),
|
||||||
|
None => Ok(RoaringBitmap::new()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* field distribution */
|
/* field distribution */
|
||||||
|
|
||||||
/// Writes the field distribution which associates every field name with
|
/// Writes the field distribution which associates every field name with
|
||||||
|
@ -49,6 +49,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
|
|||||||
self.index.put_documents_ids(self.wtxn, &RoaringBitmap::default())?;
|
self.index.put_documents_ids(self.wtxn, &RoaringBitmap::default())?;
|
||||||
self.index.put_field_distribution(self.wtxn, &FieldDistribution::default())?;
|
self.index.put_field_distribution(self.wtxn, &FieldDistribution::default())?;
|
||||||
self.index.delete_geo_rtree(self.wtxn)?;
|
self.index.delete_geo_rtree(self.wtxn)?;
|
||||||
|
self.index.delete_geo_faceted_documents_ids(self.wtxn)?;
|
||||||
|
|
||||||
// We clean all the faceted documents ids.
|
// We clean all the faceted documents ids.
|
||||||
let empty = RoaringBitmap::default();
|
let empty = RoaringBitmap::default();
|
||||||
@ -116,6 +117,7 @@ mod tests {
|
|||||||
assert!(index.documents_ids(&rtxn).unwrap().is_empty());
|
assert!(index.documents_ids(&rtxn).unwrap().is_empty());
|
||||||
assert!(index.field_distribution(&rtxn).unwrap().is_empty());
|
assert!(index.field_distribution(&rtxn).unwrap().is_empty());
|
||||||
assert!(index.geo_rtree(&rtxn).unwrap().is_none());
|
assert!(index.geo_rtree(&rtxn).unwrap().is_none());
|
||||||
|
assert!(index.geo_faceted_documents_ids(&rtxn).unwrap().is_empty());
|
||||||
|
|
||||||
assert!(index.word_docids.is_empty(&rtxn).unwrap());
|
assert!(index.word_docids.is_empty(&rtxn).unwrap());
|
||||||
assert!(index.word_prefix_docids.is_empty(&rtxn).unwrap());
|
assert!(index.word_prefix_docids.is_empty(&rtxn).unwrap());
|
||||||
|
@ -381,6 +381,8 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
drop(iter);
|
drop(iter);
|
||||||
|
|
||||||
if let Some(mut rtree) = self.index.geo_rtree(self.wtxn)? {
|
if let Some(mut rtree) = self.index.geo_rtree(self.wtxn)? {
|
||||||
|
let mut geo_faceted_doc_ids = self.index.geo_faceted_documents_ids(self.wtxn)?;
|
||||||
|
|
||||||
let points_to_remove: Vec<_> = rtree
|
let points_to_remove: Vec<_> = rtree
|
||||||
.iter()
|
.iter()
|
||||||
.filter(|&point| self.documents_ids.contains(point.data))
|
.filter(|&point| self.documents_ids.contains(point.data))
|
||||||
@ -388,9 +390,11 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
.collect();
|
.collect();
|
||||||
points_to_remove.iter().for_each(|point| {
|
points_to_remove.iter().for_each(|point| {
|
||||||
rtree.remove(&point);
|
rtree.remove(&point);
|
||||||
|
geo_faceted_doc_ids.remove(point.data);
|
||||||
});
|
});
|
||||||
|
|
||||||
self.index.put_geo_rtree(self.wtxn, &rtree)?;
|
self.index.put_geo_rtree(self.wtxn, &rtree)?;
|
||||||
|
self.index.put_geo_faceted_documents_ids(self.wtxn, &geo_faceted_doc_ids)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
// We delete the documents ids that are under the facet field id values.
|
// We delete the documents ids that are under the facet field id values.
|
||||||
@ -555,6 +559,8 @@ where
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
use std::collections::HashSet;
|
||||||
|
|
||||||
use big_s::S;
|
use big_s::S;
|
||||||
use heed::EnvOpenOptions;
|
use heed::EnvOpenOptions;
|
||||||
use maplit::hashset;
|
use maplit::hashset;
|
||||||
@ -726,11 +732,30 @@ mod tests {
|
|||||||
|
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
let rtree = index.geo_rtree(&rtxn).unwrap().unwrap();
|
let rtree = index.geo_rtree(&rtxn).unwrap().unwrap();
|
||||||
|
let geo_faceted_doc_ids = index.geo_faceted_documents_ids(&rtxn).unwrap();
|
||||||
|
|
||||||
let all_geo_ids = rtree.iter().map(|point| point.data).collect::<Vec<_>>();
|
let all_geo_ids = rtree.iter().map(|point| point.data).collect::<Vec<_>>();
|
||||||
let all_geo_documents = index.documents(&rtxn, all_geo_ids.iter().copied()).unwrap();
|
let all_geo_documents = index
|
||||||
|
.documents(&rtxn, all_geo_ids.iter().copied())
|
||||||
|
.unwrap()
|
||||||
|
.iter()
|
||||||
|
.map(|(id, _)| *id)
|
||||||
|
.collect::<HashSet<_>>();
|
||||||
|
|
||||||
for (id, _) in all_geo_documents.iter() {
|
let all_geo_faceted_ids = geo_faceted_doc_ids.iter().collect::<Vec<_>>();
|
||||||
|
let all_geo_faceted_documents = index
|
||||||
|
.documents(&rtxn, all_geo_faceted_ids.iter().copied())
|
||||||
|
.unwrap()
|
||||||
|
.iter()
|
||||||
|
.map(|(id, _)| *id)
|
||||||
|
.collect::<HashSet<_>>();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
all_geo_documents, all_geo_faceted_documents,
|
||||||
|
"There is an inconsistency between the geo_faceted database and the rtree"
|
||||||
|
);
|
||||||
|
|
||||||
|
for id in all_geo_documents.iter() {
|
||||||
assert!(!ids_to_delete.contains(&id), "The document {} was supposed to be deleted", id);
|
assert!(!ids_to_delete.contains(&id), "The document {} was supposed to be deleted", id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -182,6 +182,8 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
TypedChunk::GeoPoints(mut geo_points) => {
|
TypedChunk::GeoPoints(mut geo_points) => {
|
||||||
// TODO: TAMO: we should create the rtree with the `RTree::bulk_load` function
|
// TODO: TAMO: we should create the rtree with the `RTree::bulk_load` function
|
||||||
let mut rtree = index.geo_rtree(wtxn)?.unwrap_or_default();
|
let mut rtree = index.geo_rtree(wtxn)?.unwrap_or_default();
|
||||||
|
let mut doc_ids = index.geo_faceted_documents_ids(wtxn)?;
|
||||||
|
|
||||||
while let Some((key, value)) = geo_points.next()? {
|
while let Some((key, value)) = geo_points.next()? {
|
||||||
// convert the key back to a u32 (4 bytes)
|
// convert the key back to a u32 (4 bytes)
|
||||||
let (key, _) = helpers::try_split_array_at::<u8, 4>(key).unwrap();
|
let (key, _) = helpers::try_split_array_at::<u8, 4>(key).unwrap();
|
||||||
@ -192,8 +194,10 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
let (lng, _) = helpers::try_split_array_at::<u8, 8>(tail).unwrap();
|
let (lng, _) = helpers::try_split_array_at::<u8, 8>(tail).unwrap();
|
||||||
let point = [f64::from_ne_bytes(lat), f64::from_ne_bytes(lng)];
|
let point = [f64::from_ne_bytes(lat), f64::from_ne_bytes(lng)];
|
||||||
rtree.insert(GeoPoint::new(point, key));
|
rtree.insert(GeoPoint::new(point, key));
|
||||||
|
doc_ids.insert(key);
|
||||||
}
|
}
|
||||||
index.put_geo_rtree(wtxn, &rtree)?;
|
index.put_geo_rtree(wtxn, &rtree)?;
|
||||||
|
index.put_geo_faceted_documents_ids(wtxn, &doc_ids)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user