Move to the hgg crate

This commit is contained in:
Kerollmops 2023-06-13 15:19:01 +02:00 committed by Clément Renault
parent 642b0f3a1b
commit 268a9ef416
No known key found for this signature in database
GPG key ID: 92ADA4E935E71FA4
9 changed files with 73 additions and 99 deletions

View file

@ -9,8 +9,8 @@ use charabia::{Language, Script};
use grenad::MergerBuilder;
use heed::types::ByteSlice;
use heed::RwTxn;
use hnsw::Searcher;
use roaring::RoaringBitmap;
use space::KnnInsert;
use super::helpers::{
self, merge_ignore_values, serialize_roaring_bitmap, valid_lmdb_key, CursorClonableMmap,
@ -19,7 +19,7 @@ use super::{ClonableMmap, MergeFn};
use crate::facet::FacetType;
use crate::update::facet::FacetsUpdate;
use crate::update::index_documents::helpers::as_cloneable_grenad;
use crate::{lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index, Result, BEU32};
use crate::{lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index, Result};
pub(crate) enum TypedChunk {
FieldIdDocidFacetStrings(grenad::Reader<CursorClonableMmap>),
@ -225,19 +225,16 @@ pub(crate) fn write_typed_chunk_into_index(
index.put_geo_faceted_documents_ids(wtxn, &geo_faceted_docids)?;
}
TypedChunk::VectorPoints(vector_points) => {
let mut hnsw = index.vector_hnsw(wtxn)?.unwrap_or_default();
let mut searcher = Searcher::new();
let mut hgg = index.vector_hgg(wtxn)?.unwrap_or_default();
let mut cursor = vector_points.into_cursor()?;
while let Some((key, value)) = cursor.move_on_next()? {
// convert the key back to a u32 (4 bytes)
let docid = key.try_into().map(DocumentId::from_be_bytes).unwrap();
// convert the vector back to a Vec<f32>
let vector: Vec<f32> = pod_collect_to_vec(value);
let vector_id = hnsw.insert(vector, &mut searcher) as u32;
index.vector_id_docid.put(wtxn, &BEU32::new(vector_id), &BEU32::new(docid))?;
// convert the vector back to a Vec<f32> and insert it.
// TODO enable again when the library is fixed
hgg.insert(pod_collect_to_vec(value), docid);
}
index.put_vector_hnsw(wtxn, &hnsw)?;
index.put_vector_hgg(wtxn, &hgg)?;
}
TypedChunk::ScriptLanguageDocids(hash_pair) => {
let mut buffer = Vec::new();