Normalize for the search the facets values

This commit is contained in:
Clément Renault 2023-07-20 17:57:07 +02:00
parent 3070a20580
commit df528b41d8
No known key found for this signature in database
GPG key ID: 92ADA4E935E71FA4
12 changed files with 178 additions and 46 deletions

View file

@ -1,5 +1,5 @@
use std::borrow::Cow;
use std::collections::{HashMap, HashSet};
use std::collections::{BTreeSet, HashMap, HashSet};
use std::fs::File;
use std::mem::size_of;
use std::path::Path;
@ -21,7 +21,9 @@ use crate::heed_codec::facet::{
FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
FieldIdCodec, OrderedF64Codec,
};
use crate::heed_codec::{FstSetCodec, ScriptLanguageCodec, StrBEU16Codec, StrRefCodec};
use crate::heed_codec::{
BEU16StrCodec, FstSetCodec, ScriptLanguageCodec, StrBEU16Codec, StrRefCodec,
};
use crate::readable_slices::ReadableSlices;
use crate::{
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
@ -96,6 +98,7 @@ pub mod db_name {
pub const FACET_ID_IS_NULL_DOCIDS: &str = "facet-id-is-null-docids";
pub const FACET_ID_IS_EMPTY_DOCIDS: &str = "facet-id-is-empty-docids";
pub const FACET_ID_STRING_DOCIDS: &str = "facet-id-string-docids";
pub const FACET_ID_NORMALIZED_STRING_STRINGS: &str = "facet-id-normalized-string-strings";
pub const FACET_ID_STRING_FST: &str = "facet-id-string-fst";
pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s";
pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings";
@ -157,6 +160,8 @@ pub struct Index {
pub facet_id_f64_docids: Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
/// Maps the facet field id and ranges of strings with the docids that corresponds to them.
pub facet_id_string_docids: Database<FacetGroupKeyCodec<StrRefCodec>, FacetGroupValueCodec>,
/// Maps the facet field id of the normalized-for-search string facets with their original versions.
pub facet_id_normalized_string_strings: Database<BEU16StrCodec, SerdeJson<BTreeSet<String>>>,
/// Maps the facet field id of the string facets with an FST containing all the facets values.
pub facet_id_string_fst: Database<OwnedType<BEU16>, FstSetCodec>,
@ -181,7 +186,7 @@ impl Index {
) -> Result<Index> {
use db_name::*;
options.max_dbs(24);
options.max_dbs(25);
unsafe { options.flag(Flags::MdbAlwaysFreePages) };
let env = options.open(path)?;
@ -211,6 +216,8 @@ impl Index {
let facet_id_f64_docids = env.create_database(&mut wtxn, Some(FACET_ID_F64_DOCIDS))?;
let facet_id_string_docids =
env.create_database(&mut wtxn, Some(FACET_ID_STRING_DOCIDS))?;
let facet_id_normalized_string_strings =
env.create_database(&mut wtxn, Some(FACET_ID_NORMALIZED_STRING_STRINGS))?;
let facet_id_string_fst = env.create_database(&mut wtxn, Some(FACET_ID_STRING_FST))?;
let facet_id_exists_docids =
env.create_database(&mut wtxn, Some(FACET_ID_EXISTS_DOCIDS))?;
@ -246,6 +253,7 @@ impl Index {
field_id_word_count_docids,
facet_id_f64_docids,
facet_id_string_docids,
facet_id_normalized_string_strings,
facet_id_string_fst,
facet_id_exists_docids,
facet_id_is_null_docids,