mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 12:27:13 +02:00
Move back to the hnsw crate
This reverts commit 7a4b6c065482f988b01298642f4c18775503f92f.
This commit is contained in:
parent
aca305bb77
commit
c79e82c62a
9 changed files with 101 additions and 60 deletions
|
@ -8,11 +8,12 @@ use charabia::{Language, Script};
|
|||
use heed::flags::Flags;
|
||||
use heed::types::*;
|
||||
use heed::{CompactionOption, Database, PolyDatabase, RoTxn, RwTxn};
|
||||
use rand_pcg::Pcg32;
|
||||
use roaring::RoaringBitmap;
|
||||
use rstar::RTree;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use crate::distance::Euclidean;
|
||||
use crate::dot_product::DotProduct;
|
||||
use crate::error::{InternalError, UserError};
|
||||
use crate::facet::FacetType;
|
||||
use crate::fields_ids_map::FieldsIdsMap;
|
||||
|
@ -27,8 +28,8 @@ use crate::{
|
|||
Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16, BEU32,
|
||||
};
|
||||
|
||||
/// The HGG data-structure that we serialize, fill and search in.
|
||||
pub type Hgg = hgg::Hgg<Euclidean, Vec<f32>, DocumentId>;
|
||||
/// The HNSW data-structure that we serialize, fill and search in.
|
||||
pub type Hnsw = hnsw::Hnsw<DotProduct, Vec<f32>, Pcg32, 12, 24>;
|
||||
|
||||
pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
|
||||
pub const DEFAULT_MIN_WORD_LEN_TWO_TYPOS: u8 = 9;
|
||||
|
@ -46,7 +47,7 @@ pub mod main_key {
|
|||
pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map";
|
||||
pub const GEO_FACETED_DOCUMENTS_IDS_KEY: &str = "geo-faceted-documents-ids";
|
||||
pub const GEO_RTREE_KEY: &str = "geo-rtree";
|
||||
pub const VECTOR_HGG_KEY: &str = "vector-hgg";
|
||||
pub const VECTOR_HNSW_KEY: &str = "vector-hnsw";
|
||||
pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids";
|
||||
pub const NUMBER_FACETED_DOCUMENTS_IDS_PREFIX: &str = "number-faceted-documents-ids";
|
||||
pub const PRIMARY_KEY_KEY: &str = "primary-key";
|
||||
|
@ -91,6 +92,7 @@ pub mod db_name {
|
|||
pub const FACET_ID_STRING_DOCIDS: &str = "facet-id-string-docids";
|
||||
pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s";
|
||||
pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings";
|
||||
pub const VECTOR_ID_DOCID: &str = "vector-id-docids";
|
||||
pub const DOCUMENTS: &str = "documents";
|
||||
pub const SCRIPT_LANGUAGE_DOCIDS: &str = "script_language_docids";
|
||||
}
|
||||
|
@ -154,6 +156,9 @@ pub struct Index {
|
|||
/// Maps the document id, the facet field id and the strings.
|
||||
pub field_id_docid_facet_strings: Database<FieldDocIdFacetStringCodec, Str>,
|
||||
|
||||
/// Maps a vector id to the document id that have it.
|
||||
pub vector_id_docid: Database<OwnedType<BEU32>, OwnedType<BEU32>>,
|
||||
|
||||
/// Maps the document id to the document as an obkv store.
|
||||
pub(crate) documents: Database<OwnedType<BEU32>, ObkvCodec>,
|
||||
}
|
||||
|
@ -167,7 +172,7 @@ impl Index {
|
|||
) -> Result<Index> {
|
||||
use db_name::*;
|
||||
|
||||
options.max_dbs(23);
|
||||
options.max_dbs(24);
|
||||
unsafe { options.flag(Flags::MdbAlwaysFreePages) };
|
||||
|
||||
let env = options.open(path)?;
|
||||
|
@ -207,6 +212,7 @@ impl Index {
|
|||
env.create_database(&mut wtxn, Some(FIELD_ID_DOCID_FACET_F64S))?;
|
||||
let field_id_docid_facet_strings =
|
||||
env.create_database(&mut wtxn, Some(FIELD_ID_DOCID_FACET_STRINGS))?;
|
||||
let vector_id_docid = env.create_database(&mut wtxn, Some(VECTOR_ID_DOCID))?;
|
||||
let documents = env.create_database(&mut wtxn, Some(DOCUMENTS))?;
|
||||
wtxn.commit()?;
|
||||
|
||||
|
@ -235,6 +241,7 @@ impl Index {
|
|||
facet_id_is_empty_docids,
|
||||
field_id_docid_facet_f64s,
|
||||
field_id_docid_facet_strings,
|
||||
vector_id_docid,
|
||||
documents,
|
||||
})
|
||||
}
|
||||
|
@ -506,22 +513,22 @@ impl Index {
|
|||
}
|
||||
}
|
||||
|
||||
/* vector HGG */
|
||||
/* vector HNSW */
|
||||
|
||||
/// Writes the provided `hgg`.
|
||||
pub(crate) fn put_vector_hgg(&self, wtxn: &mut RwTxn, hgg: &Hgg) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeBincode<Hgg>>(wtxn, main_key::VECTOR_HGG_KEY, hgg)
|
||||
/// Writes the provided `hnsw`.
|
||||
pub(crate) fn put_vector_hnsw(&self, wtxn: &mut RwTxn, hnsw: &Hnsw) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeBincode<Hnsw>>(wtxn, main_key::VECTOR_HNSW_KEY, hnsw)
|
||||
}
|
||||
|
||||
/// Delete the `hgg`.
|
||||
pub(crate) fn delete_vector_hgg(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, main_key::VECTOR_HGG_KEY)
|
||||
/// Delete the `hnsw`.
|
||||
pub(crate) fn delete_vector_hnsw(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, main_key::VECTOR_HNSW_KEY)
|
||||
}
|
||||
|
||||
/// Returns the `hgg`.
|
||||
pub fn vector_hgg(&self, rtxn: &RoTxn) -> Result<Option<Hgg>> {
|
||||
match self.main.get::<_, Str, SerdeBincode<Hgg>>(rtxn, main_key::VECTOR_HGG_KEY)? {
|
||||
Some(hgg) => Ok(Some(hgg)),
|
||||
/// Returns the `hnsw`.
|
||||
pub fn vector_hnsw(&self, rtxn: &RoTxn) -> Result<Option<Hnsw>> {
|
||||
match self.main.get::<_, Str, SerdeBincode<Hnsw>>(rtxn, main_key::VECTOR_HNSW_KEY)? {
|
||||
Some(hnsw) => Ok(Some(hnsw)),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue