mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-23 05:14:27 +01:00
Implement an ugly deletion of values in the HNSW
This commit is contained in:
parent
436a10bef4
commit
c2a402f3ae
@ -4,8 +4,10 @@ use std::collections::{BTreeSet, HashMap, HashSet};
|
|||||||
use fst::IntoStreamer;
|
use fst::IntoStreamer;
|
||||||
use heed::types::{ByteSlice, DecodeIgnore, Str, UnalignedSlice};
|
use heed::types::{ByteSlice, DecodeIgnore, Str, UnalignedSlice};
|
||||||
use heed::{BytesDecode, BytesEncode, Database, RwIter};
|
use heed::{BytesDecode, BytesEncode, Database, RwIter};
|
||||||
|
use hnsw::Searcher;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
use space::KnnPoints;
|
||||||
use time::OffsetDateTime;
|
use time::OffsetDateTime;
|
||||||
|
|
||||||
use super::facet::delete::FacetsDelete;
|
use super::facet::delete::FacetsDelete;
|
||||||
@ -14,6 +16,7 @@ use crate::error::InternalError;
|
|||||||
use crate::facet::FacetType;
|
use crate::facet::FacetType;
|
||||||
use crate::heed_codec::facet::FieldDocIdFacetCodec;
|
use crate::heed_codec::facet::FieldDocIdFacetCodec;
|
||||||
use crate::heed_codec::CboRoaringBitmapCodec;
|
use crate::heed_codec::CboRoaringBitmapCodec;
|
||||||
|
use crate::index::Hnsw;
|
||||||
use crate::{
|
use crate::{
|
||||||
ExternalDocumentsIds, FieldId, FieldIdMapMissingEntry, Index, Result, RoaringBitmapCodec, BEU32,
|
ExternalDocumentsIds, FieldId, FieldIdMapMissingEntry, Index, Result, RoaringBitmapCodec, BEU32,
|
||||||
};
|
};
|
||||||
@ -430,6 +433,30 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
&self.to_delete_docids,
|
&self.to_delete_docids,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
|
// An ugly and slow way to remove the vectors from the HNSW
|
||||||
|
// It basically reconstructs the HNSW from scratch without editing the current one.
|
||||||
|
let current_hnsw = self.index.vector_hnsw(self.wtxn)?.unwrap_or_default();
|
||||||
|
if !current_hnsw.is_empty() {
|
||||||
|
let mut new_hnsw = Hnsw::default();
|
||||||
|
let mut searcher = Searcher::new();
|
||||||
|
let mut new_vector_id_docids = Vec::new();
|
||||||
|
|
||||||
|
for result in vector_id_docid.iter(self.wtxn)? {
|
||||||
|
let (vector_id, docid) = result?;
|
||||||
|
if !self.to_delete_docids.contains(docid.get()) {
|
||||||
|
let vector = current_hnsw.get_point(vector_id.get() as usize).clone();
|
||||||
|
let vector_id = new_hnsw.insert(vector, &mut searcher);
|
||||||
|
new_vector_id_docids.push((vector_id as u32, docid));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
vector_id_docid.clear(self.wtxn)?;
|
||||||
|
for (vector_id, docid) in new_vector_id_docids {
|
||||||
|
vector_id_docid.put(self.wtxn, &BEU32::new(vector_id), &docid)?;
|
||||||
|
}
|
||||||
|
self.index.put_vector_hnsw(self.wtxn, &new_hnsw)?;
|
||||||
|
}
|
||||||
|
|
||||||
self.index.put_soft_deleted_documents_ids(self.wtxn, &RoaringBitmap::new())?;
|
self.index.put_soft_deleted_documents_ids(self.wtxn, &RoaringBitmap::new())?;
|
||||||
|
|
||||||
Ok(DetailedDocumentDeletionResult {
|
Ok(DetailedDocumentDeletionResult {
|
||||||
|
Loading…
Reference in New Issue
Block a user