mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 20:37:15 +02:00
Move back to the hnsw crate
This reverts commit 7a4b6c065482f988b01298642f4c18775503f92f.
This commit is contained in:
parent
aca305bb77
commit
c79e82c62a
9 changed files with 101 additions and 60 deletions
|
@ -39,6 +39,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
|
|||
facet_id_is_empty_docids,
|
||||
field_id_docid_facet_f64s,
|
||||
field_id_docid_facet_strings,
|
||||
vector_id_docid,
|
||||
documents,
|
||||
} = self.index;
|
||||
|
||||
|
@ -57,7 +58,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
|
|||
self.index.put_field_distribution(self.wtxn, &FieldDistribution::default())?;
|
||||
self.index.delete_geo_rtree(self.wtxn)?;
|
||||
self.index.delete_geo_faceted_documents_ids(self.wtxn)?;
|
||||
self.index.delete_vector_hgg(self.wtxn)?;
|
||||
self.index.delete_vector_hnsw(self.wtxn)?;
|
||||
|
||||
// We clean all the faceted documents ids.
|
||||
for field_id in faceted_fields {
|
||||
|
@ -96,6 +97,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
|
|||
facet_id_string_docids.clear(self.wtxn)?;
|
||||
field_id_docid_facet_f64s.clear(self.wtxn)?;
|
||||
field_id_docid_facet_strings.clear(self.wtxn)?;
|
||||
vector_id_docid.clear(self.wtxn)?;
|
||||
documents.clear(self.wtxn)?;
|
||||
|
||||
Ok(number_of_documents)
|
||||
|
|
|
@ -240,6 +240,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||
facet_id_exists_docids,
|
||||
facet_id_is_null_docids,
|
||||
facet_id_is_empty_docids,
|
||||
vector_id_docid,
|
||||
documents,
|
||||
} = self.index;
|
||||
// Remove from the documents database
|
||||
|
@ -274,8 +275,6 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||
&mut words_to_delete,
|
||||
)?;
|
||||
|
||||
todo!("delete the documents from the Hgg datastructure");
|
||||
|
||||
// We construct an FST set that contains the words to delete from the words FST.
|
||||
let words_to_delete = fst::Set::from_iter(words_to_delete.difference(&words_to_keep))?;
|
||||
|
||||
|
|
|
@ -9,8 +9,8 @@ use charabia::{Language, Script};
|
|||
use grenad::MergerBuilder;
|
||||
use heed::types::ByteSlice;
|
||||
use heed::RwTxn;
|
||||
use hnsw::Searcher;
|
||||
use roaring::RoaringBitmap;
|
||||
use space::KnnInsert;
|
||||
|
||||
use super::helpers::{
|
||||
self, merge_ignore_values, serialize_roaring_bitmap, valid_lmdb_key, CursorClonableMmap,
|
||||
|
@ -19,7 +19,7 @@ use super::{ClonableMmap, MergeFn};
|
|||
use crate::facet::FacetType;
|
||||
use crate::update::facet::FacetsUpdate;
|
||||
use crate::update::index_documents::helpers::as_cloneable_grenad;
|
||||
use crate::{lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index, Result};
|
||||
use crate::{lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index, Result, BEU32};
|
||||
|
||||
pub(crate) enum TypedChunk {
|
||||
FieldIdDocidFacetStrings(grenad::Reader<CursorClonableMmap>),
|
||||
|
@ -225,17 +225,20 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||
index.put_geo_faceted_documents_ids(wtxn, &geo_faceted_docids)?;
|
||||
}
|
||||
TypedChunk::VectorPoints(vector_points) => {
|
||||
let mut hgg = index.vector_hgg(wtxn)?.unwrap_or_default();
|
||||
let mut hnsw = index.vector_hnsw(wtxn)?.unwrap_or_default();
|
||||
let mut searcher = Searcher::new();
|
||||
|
||||
let mut cursor = vector_points.into_cursor()?;
|
||||
while let Some((key, value)) = cursor.move_on_next()? {
|
||||
// convert the key back to a u32 (4 bytes)
|
||||
let docid = key.try_into().map(DocumentId::from_be_bytes).unwrap();
|
||||
// convert the vector back to a Vec<f32> and insert it.
|
||||
// TODO enable again when the library is fixed
|
||||
hgg.insert(pod_collect_to_vec(value), docid);
|
||||
// convert the vector back to a Vec<f32>
|
||||
let vector: Vec<f32> = pod_collect_to_vec(value);
|
||||
let vector_id = hnsw.insert(vector, &mut searcher) as u32;
|
||||
index.vector_id_docid.put(wtxn, &BEU32::new(vector_id), &BEU32::new(docid))?;
|
||||
}
|
||||
log::debug!("There are {} entries in the HGG so far", hgg.len());
|
||||
index.put_vector_hgg(wtxn, &hgg)?;
|
||||
log::debug!("There are {} entries in the HNSW so far", hnsw.len());
|
||||
index.put_vector_hnsw(wtxn, &hnsw)?;
|
||||
}
|
||||
TypedChunk::ScriptLanguageDocids(hash_pair) => {
|
||||
let mut buffer = Vec::new();
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue