mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 04:17:10 +02:00
Merge branch 'main' into settings-customizing-tokenization
This commit is contained in:
commit
4a21fecf67
166 changed files with 2252 additions and 1072 deletions
|
@ -1,20 +1,36 @@
|
|||
use std::ops;
|
||||
|
||||
use instant_distance::Point;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use space::Metric;
|
||||
|
||||
#[derive(Debug, Default, Clone, Copy, Serialize, Deserialize)]
|
||||
pub struct DotProduct;
|
||||
use crate::normalize_vector;
|
||||
|
||||
impl Metric<Vec<f32>> for DotProduct {
|
||||
type Unit = u32;
|
||||
#[derive(Debug, Default, Clone, Serialize, Deserialize)]
|
||||
pub struct NDotProductPoint(Vec<f32>);
|
||||
|
||||
// Following <https://docs.rs/space/0.17.0/space/trait.Metric.html>.
|
||||
//
|
||||
// Here is a playground that validate the ordering of the bit representation of floats in range 0.0..=1.0:
|
||||
// <https://play.rust-lang.org/?version=stable&mode=debug&edition=2021&gist=6c59e31a3cc5036b32edf51e8937b56e>
|
||||
fn distance(&self, a: &Vec<f32>, b: &Vec<f32>) -> Self::Unit {
|
||||
let dist = 1.0 - dot_product_similarity(a, b);
|
||||
impl NDotProductPoint {
|
||||
pub fn new(point: Vec<f32>) -> Self {
|
||||
NDotProductPoint(normalize_vector(point))
|
||||
}
|
||||
|
||||
pub fn into_inner(self) -> Vec<f32> {
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl ops::Deref for NDotProductPoint {
|
||||
type Target = [f32];
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
self.0.as_slice()
|
||||
}
|
||||
}
|
||||
|
||||
impl Point for NDotProductPoint {
|
||||
fn distance(&self, other: &Self) -> f32 {
|
||||
let dist = 1.0 - dot_product_similarity(&self.0, &other.0);
|
||||
debug_assert!(!dist.is_nan());
|
||||
dist.to_bits()
|
||||
dist
|
||||
}
|
||||
}
|
||||
|
||||
|
|
27
milli/src/heed_codec/beu16_str_codec.rs
Normal file
27
milli/src/heed_codec/beu16_str_codec.rs
Normal file
|
@ -0,0 +1,27 @@
|
|||
use std::borrow::Cow;
|
||||
use std::convert::TryInto;
|
||||
use std::str;
|
||||
|
||||
pub struct BEU16StrCodec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for BEU16StrCodec {
|
||||
type DItem = (u16, &'a str);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let (n_bytes, str_bytes) = bytes.split_at(2);
|
||||
let n = n_bytes.try_into().map(u16::from_be_bytes).ok()?;
|
||||
let s = str::from_utf8(str_bytes).ok()?;
|
||||
Some((n, s))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for BEU16StrCodec {
|
||||
type EItem = (u16, &'a str);
|
||||
|
||||
fn bytes_encode((n, s): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
let mut bytes = Vec::with_capacity(s.len() + 2);
|
||||
bytes.extend_from_slice(&n.to_be_bytes());
|
||||
bytes.extend_from_slice(s.as_bytes());
|
||||
Some(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
|
@ -1,3 +1,4 @@
|
|||
mod beu16_str_codec;
|
||||
mod beu32_str_codec;
|
||||
mod byte_slice_ref;
|
||||
pub mod facet;
|
||||
|
@ -14,6 +15,7 @@ mod str_str_u8_codec;
|
|||
pub use byte_slice_ref::ByteSliceRefCodec;
|
||||
pub use str_ref::StrRefCodec;
|
||||
|
||||
pub use self::beu16_str_codec::BEU16StrCodec;
|
||||
pub use self::beu32_str_codec::BEU32StrCodec;
|
||||
pub use self::field_id_word_count_codec::FieldIdWordCountCodec;
|
||||
pub use self::fst_set_codec::FstSetCodec;
|
||||
|
|
|
@ -8,12 +8,11 @@ use charabia::{Language, Script};
|
|||
use heed::flags::Flags;
|
||||
use heed::types::*;
|
||||
use heed::{CompactionOption, Database, PolyDatabase, RoTxn, RwTxn};
|
||||
use rand_pcg::Pcg32;
|
||||
use roaring::RoaringBitmap;
|
||||
use rstar::RTree;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use crate::distance::DotProduct;
|
||||
use crate::distance::NDotProductPoint;
|
||||
use crate::error::{InternalError, UserError};
|
||||
use crate::facet::FacetType;
|
||||
use crate::fields_ids_map::FieldsIdsMap;
|
||||
|
@ -21,7 +20,9 @@ use crate::heed_codec::facet::{
|
|||
FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
|
||||
FieldIdCodec, OrderedF64Codec,
|
||||
};
|
||||
use crate::heed_codec::{FstSetCodec, ScriptLanguageCodec, StrBEU16Codec, StrRefCodec};
|
||||
use crate::heed_codec::{
|
||||
BEU16StrCodec, FstSetCodec, ScriptLanguageCodec, StrBEU16Codec, StrRefCodec,
|
||||
};
|
||||
use crate::readable_slices::ReadableSlices;
|
||||
use crate::{
|
||||
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
|
||||
|
@ -31,7 +32,7 @@ use crate::{
|
|||
};
|
||||
|
||||
/// The HNSW data-structure that we serialize, fill and search in.
|
||||
pub type Hnsw = hnsw::Hnsw<DotProduct, Vec<f32>, Pcg32, 12, 24>;
|
||||
pub type Hnsw = instant_distance::Hnsw<NDotProductPoint>;
|
||||
|
||||
pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
|
||||
pub const DEFAULT_MIN_WORD_LEN_TWO_TYPOS: u8 = 9;
|
||||
|
@ -100,6 +101,7 @@ pub mod db_name {
|
|||
pub const FACET_ID_IS_NULL_DOCIDS: &str = "facet-id-is-null-docids";
|
||||
pub const FACET_ID_IS_EMPTY_DOCIDS: &str = "facet-id-is-empty-docids";
|
||||
pub const FACET_ID_STRING_DOCIDS: &str = "facet-id-string-docids";
|
||||
pub const FACET_ID_NORMALIZED_STRING_STRINGS: &str = "facet-id-normalized-string-strings";
|
||||
pub const FACET_ID_STRING_FST: &str = "facet-id-string-fst";
|
||||
pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s";
|
||||
pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings";
|
||||
|
@ -161,6 +163,8 @@ pub struct Index {
|
|||
pub facet_id_f64_docids: Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
|
||||
/// Maps the facet field id and ranges of strings with the docids that corresponds to them.
|
||||
pub facet_id_string_docids: Database<FacetGroupKeyCodec<StrRefCodec>, FacetGroupValueCodec>,
|
||||
/// Maps the facet field id of the normalized-for-search string facets with their original versions.
|
||||
pub facet_id_normalized_string_strings: Database<BEU16StrCodec, SerdeJson<BTreeSet<String>>>,
|
||||
/// Maps the facet field id of the string facets with an FST containing all the facets values.
|
||||
pub facet_id_string_fst: Database<OwnedType<BEU16>, FstSetCodec>,
|
||||
|
||||
|
@ -185,7 +189,7 @@ impl Index {
|
|||
) -> Result<Index> {
|
||||
use db_name::*;
|
||||
|
||||
options.max_dbs(24);
|
||||
options.max_dbs(25);
|
||||
unsafe { options.flag(Flags::MdbAlwaysFreePages) };
|
||||
|
||||
let env = options.open(path)?;
|
||||
|
@ -215,6 +219,8 @@ impl Index {
|
|||
let facet_id_f64_docids = env.create_database(&mut wtxn, Some(FACET_ID_F64_DOCIDS))?;
|
||||
let facet_id_string_docids =
|
||||
env.create_database(&mut wtxn, Some(FACET_ID_STRING_DOCIDS))?;
|
||||
let facet_id_normalized_string_strings =
|
||||
env.create_database(&mut wtxn, Some(FACET_ID_NORMALIZED_STRING_STRINGS))?;
|
||||
let facet_id_string_fst = env.create_database(&mut wtxn, Some(FACET_ID_STRING_FST))?;
|
||||
let facet_id_exists_docids =
|
||||
env.create_database(&mut wtxn, Some(FACET_ID_EXISTS_DOCIDS))?;
|
||||
|
@ -250,6 +256,7 @@ impl Index {
|
|||
field_id_word_count_docids,
|
||||
facet_id_f64_docids,
|
||||
facet_id_string_docids,
|
||||
facet_id_normalized_string_strings,
|
||||
facet_id_string_fst,
|
||||
facet_id_exists_docids,
|
||||
facet_id_is_null_docids,
|
||||
|
|
|
@ -51,9 +51,10 @@ pub use self::error::{
|
|||
pub use self::external_documents_ids::ExternalDocumentsIds;
|
||||
pub use self::fields_ids_map::FieldsIdsMap;
|
||||
pub use self::heed_codec::{
|
||||
BEU32StrCodec, BoRoaringBitmapCodec, BoRoaringBitmapLenCodec, CboRoaringBitmapCodec,
|
||||
CboRoaringBitmapLenCodec, FieldIdWordCountCodec, ObkvCodec, RoaringBitmapCodec,
|
||||
RoaringBitmapLenCodec, StrBEU32Codec, U8StrStrCodec, UncheckedU8StrStrCodec,
|
||||
BEU16StrCodec, BEU32StrCodec, BoRoaringBitmapCodec, BoRoaringBitmapLenCodec,
|
||||
CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, FieldIdWordCountCodec, ObkvCodec,
|
||||
RoaringBitmapCodec, RoaringBitmapLenCodec, StrBEU32Codec, U8StrStrCodec,
|
||||
UncheckedU8StrStrCodec,
|
||||
};
|
||||
pub use self::index::Index;
|
||||
pub use self::search::{
|
||||
|
|
|
@ -10,7 +10,7 @@ pub enum ScoreDetails {
|
|||
Fid(Rank),
|
||||
Position(Rank),
|
||||
ExactAttribute(ExactAttribute),
|
||||
Exactness(Rank),
|
||||
ExactWords(ExactWords),
|
||||
Sort(Sort),
|
||||
GeoSort(GeoSort),
|
||||
}
|
||||
|
@ -28,7 +28,7 @@ impl ScoreDetails {
|
|||
ScoreDetails::Fid(details) => Some(*details),
|
||||
ScoreDetails::Position(details) => Some(*details),
|
||||
ScoreDetails::ExactAttribute(details) => Some(details.rank()),
|
||||
ScoreDetails::Exactness(details) => Some(*details),
|
||||
ScoreDetails::ExactWords(details) => Some(details.rank()),
|
||||
ScoreDetails::Sort(_) => None,
|
||||
ScoreDetails::GeoSort(_) => None,
|
||||
}
|
||||
|
@ -84,7 +84,7 @@ impl ScoreDetails {
|
|||
// For now, fid is a virtual rule always followed by the "position" rule
|
||||
let fid_details = serde_json::json!({
|
||||
"order": order,
|
||||
"attribute_ranking_order_score": fid.local_score(),
|
||||
"attributeRankingOrderScore": fid.local_score(),
|
||||
});
|
||||
details_map.insert("attribute".into(), fid_details);
|
||||
order += 1;
|
||||
|
@ -102,7 +102,7 @@ impl ScoreDetails {
|
|||
};
|
||||
|
||||
attribute_details
|
||||
.insert("query_word_distance_score".into(), position.local_score().into());
|
||||
.insert("queryWordDistanceScore".into(), position.local_score().into());
|
||||
let score = Rank::global_score([fid_details, *position].iter().copied());
|
||||
attribute_details.insert("score".into(), score.into());
|
||||
|
||||
|
@ -117,7 +117,7 @@ impl ScoreDetails {
|
|||
details_map.insert("exactness".into(), exactness_details);
|
||||
order += 1;
|
||||
}
|
||||
ScoreDetails::Exactness(details) => {
|
||||
ScoreDetails::ExactWords(details) => {
|
||||
// For now, exactness is a virtual rule always preceded by the "ExactAttribute" rule
|
||||
let exactness_details = details_map
|
||||
.get_mut("exactness")
|
||||
|
@ -129,9 +129,16 @@ impl ScoreDetails {
|
|||
== &serde_json::json!(ExactAttribute::NoExactMatch)
|
||||
{
|
||||
let score = Rank::global_score(
|
||||
[ExactAttribute::NoExactMatch.rank(), *details].iter().copied(),
|
||||
[ExactAttribute::NoExactMatch.rank(), details.rank()].iter().copied(),
|
||||
);
|
||||
*exactness_details.get_mut("score").expect("missing score") = score.into();
|
||||
// tiny detail, but we want the score to be the last displayed field,
|
||||
// so we're removing it here, adding the other fields, then adding the new score
|
||||
exactness_details.remove("score");
|
||||
exactness_details
|
||||
.insert("matchingWords".into(), details.matching_words.into());
|
||||
exactness_details
|
||||
.insert("maxMatchingWords".into(), details.max_matching_words.into());
|
||||
exactness_details.insert("score".into(), score.into());
|
||||
}
|
||||
// do not update the order since this was already done by exactAttribute
|
||||
}
|
||||
|
@ -209,8 +216,34 @@ impl Words {
|
|||
Rank { rank: self.matching_words, max_rank: self.max_matching_words }
|
||||
}
|
||||
|
||||
pub(crate) fn from_rank(rank: Rank) -> Words {
|
||||
Words { matching_words: rank.rank, max_matching_words: rank.max_rank }
|
||||
pub(crate) fn from_rank(rank: Rank) -> Self {
|
||||
Self { matching_words: rank.rank, max_matching_words: rank.max_rank }
|
||||
}
|
||||
}
|
||||
|
||||
/// Structure that is super similar to [`Words`], but whose semantics is a bit distinct.
|
||||
///
|
||||
/// In exactness, the number of matching words can actually be 0 with a non-zero score,
|
||||
/// if no words from the query appear exactly in the document.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct ExactWords {
|
||||
pub matching_words: u32,
|
||||
pub max_matching_words: u32,
|
||||
}
|
||||
|
||||
impl ExactWords {
|
||||
pub fn rank(&self) -> Rank {
|
||||
// 0 matching words means last rank (1)
|
||||
Rank { rank: self.matching_words + 1, max_rank: self.max_matching_words + 1 }
|
||||
}
|
||||
|
||||
pub(crate) fn from_rank(rank: Rank) -> Self {
|
||||
// last rank (1) means that 0 words from the query appear exactly in the document.
|
||||
// first rank (max_rank) means that (max_rank - 1) words from the query appear exactly in the document.
|
||||
Self {
|
||||
matching_words: rank.rank.saturating_sub(1),
|
||||
max_matching_words: rank.max_rank.saturating_sub(1),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -223,7 +256,7 @@ pub struct Typo {
|
|||
impl Typo {
|
||||
pub fn rank(&self) -> Rank {
|
||||
Rank {
|
||||
rank: self.max_typo_count - self.typo_count + 1,
|
||||
rank: (self.max_typo_count + 1).saturating_sub(self.typo_count),
|
||||
max_rank: (self.max_typo_count + 1),
|
||||
}
|
||||
}
|
||||
|
@ -236,7 +269,10 @@ impl Typo {
|
|||
// rank + typo = max_rank
|
||||
// typo = max_rank - rank
|
||||
pub fn from_rank(rank: Rank) -> Typo {
|
||||
Typo { typo_count: rank.max_rank - rank.rank, max_typo_count: rank.max_rank - 1 }
|
||||
Typo {
|
||||
typo_count: rank.max_rank.saturating_sub(rank.rank),
|
||||
max_typo_count: rank.max_rank.saturating_sub(1),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,5 +1,8 @@
|
|||
use std::fmt;
|
||||
use std::ops::ControlFlow;
|
||||
|
||||
use charabia::normalizer::NormalizerOption;
|
||||
use charabia::Normalize;
|
||||
use fst::automaton::{Automaton, Str};
|
||||
use fst::{IntoStreamer, Streamer};
|
||||
use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA};
|
||||
|
@ -14,8 +17,8 @@ use crate::error::UserError;
|
|||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
|
||||
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
||||
use crate::{
|
||||
execute_search, normalize_facet, AscDesc, DefaultSearchLogger, DocumentId, FieldId, Index,
|
||||
Result, SearchContext, BEU16,
|
||||
execute_search, AscDesc, DefaultSearchLogger, DocumentId, FieldId, Index, Result,
|
||||
SearchContext, BEU16,
|
||||
};
|
||||
|
||||
// Building these factories is not free.
|
||||
|
@ -301,29 +304,28 @@ impl<'a> SearchForFacetValues<'a> {
|
|||
|
||||
match self.query.as_ref() {
|
||||
Some(query) => {
|
||||
let query = normalize_facet(query);
|
||||
let query = query.as_str();
|
||||
let options = NormalizerOption { lossy: true, ..Default::default() };
|
||||
let query = query.normalize(&options);
|
||||
let query = query.as_ref();
|
||||
|
||||
let authorize_typos = self.search_query.index.authorize_typos(rtxn)?;
|
||||
let field_authorizes_typos =
|
||||
!self.search_query.index.exact_attributes_ids(rtxn)?.contains(&fid);
|
||||
|
||||
if authorize_typos && field_authorizes_typos {
|
||||
let mut results = vec![];
|
||||
|
||||
let exact_words_fst = self.search_query.index.exact_words(rtxn)?;
|
||||
if exact_words_fst.map_or(false, |fst| fst.contains(query)) {
|
||||
let key = FacetGroupKey { field_id: fid, level: 0, left_bound: query };
|
||||
if let Some(FacetGroupValue { bitmap, .. }) =
|
||||
index.facet_id_string_docids.get(rtxn, &key)?
|
||||
{
|
||||
let count = search_candidates.intersection_len(&bitmap);
|
||||
if count != 0 {
|
||||
let value = self
|
||||
.one_original_value_of(fid, query, bitmap.min().unwrap())?
|
||||
.unwrap_or_else(|| query.to_string());
|
||||
results.push(FacetValueHit { value, count });
|
||||
}
|
||||
let mut results = vec![];
|
||||
if fst.contains(query) {
|
||||
self.fetch_original_facets_using_normalized(
|
||||
fid,
|
||||
query,
|
||||
query,
|
||||
&search_candidates,
|
||||
&mut results,
|
||||
)?;
|
||||
}
|
||||
Ok(results)
|
||||
} else {
|
||||
let one_typo = self.search_query.index.min_word_len_one_typo(rtxn)?;
|
||||
let two_typos = self.search_query.index.min_word_len_two_typos(rtxn)?;
|
||||
|
@ -338,60 +340,41 @@ impl<'a> SearchForFacetValues<'a> {
|
|||
};
|
||||
|
||||
let mut stream = fst.search(automaton).into_stream();
|
||||
let mut length = 0;
|
||||
let mut results = vec![];
|
||||
while let Some(facet_value) = stream.next() {
|
||||
let value = std::str::from_utf8(facet_value)?;
|
||||
let key = FacetGroupKey { field_id: fid, level: 0, left_bound: value };
|
||||
let docids = match index.facet_id_string_docids.get(rtxn, &key)? {
|
||||
Some(FacetGroupValue { bitmap, .. }) => bitmap,
|
||||
None => {
|
||||
error!(
|
||||
"the facet value is missing from the facet database: {key:?}"
|
||||
);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let count = search_candidates.intersection_len(&docids);
|
||||
if count != 0 {
|
||||
let value = self
|
||||
.one_original_value_of(fid, value, docids.min().unwrap())?
|
||||
.unwrap_or_else(|| query.to_string());
|
||||
results.push(FacetValueHit { value, count });
|
||||
length += 1;
|
||||
}
|
||||
if length >= MAX_NUMBER_OF_FACETS {
|
||||
if self
|
||||
.fetch_original_facets_using_normalized(
|
||||
fid,
|
||||
value,
|
||||
query,
|
||||
&search_candidates,
|
||||
&mut results,
|
||||
)?
|
||||
.is_break()
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(results)
|
||||
Ok(results)
|
||||
}
|
||||
} else {
|
||||
let automaton = Str::new(query).starts_with();
|
||||
let mut stream = fst.search(automaton).into_stream();
|
||||
let mut results = vec![];
|
||||
let mut length = 0;
|
||||
while let Some(facet_value) = stream.next() {
|
||||
let value = std::str::from_utf8(facet_value)?;
|
||||
let key = FacetGroupKey { field_id: fid, level: 0, left_bound: value };
|
||||
let docids = match index.facet_id_string_docids.get(rtxn, &key)? {
|
||||
Some(FacetGroupValue { bitmap, .. }) => bitmap,
|
||||
None => {
|
||||
error!(
|
||||
"the facet value is missing from the facet database: {key:?}"
|
||||
);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let count = search_candidates.intersection_len(&docids);
|
||||
if count != 0 {
|
||||
let value = self
|
||||
.one_original_value_of(fid, value, docids.min().unwrap())?
|
||||
.unwrap_or_else(|| query.to_string());
|
||||
results.push(FacetValueHit { value, count });
|
||||
length += 1;
|
||||
}
|
||||
if length >= MAX_NUMBER_OF_FACETS {
|
||||
if self
|
||||
.fetch_original_facets_using_normalized(
|
||||
fid,
|
||||
value,
|
||||
query,
|
||||
&search_candidates,
|
||||
&mut results,
|
||||
)?
|
||||
.is_break()
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -401,7 +384,6 @@ impl<'a> SearchForFacetValues<'a> {
|
|||
}
|
||||
None => {
|
||||
let mut results = vec![];
|
||||
let mut length = 0;
|
||||
let prefix = FacetGroupKey { field_id: fid, level: 0, left_bound: "" };
|
||||
for result in index.facet_id_string_docids.prefix_iter(rtxn, &prefix)? {
|
||||
let (FacetGroupKey { left_bound, .. }, FacetGroupValue { bitmap, .. }) =
|
||||
|
@ -412,9 +394,8 @@ impl<'a> SearchForFacetValues<'a> {
|
|||
.one_original_value_of(fid, left_bound, bitmap.min().unwrap())?
|
||||
.unwrap_or_else(|| left_bound.to_string());
|
||||
results.push(FacetValueHit { value, count });
|
||||
length += 1;
|
||||
}
|
||||
if length >= MAX_NUMBER_OF_FACETS {
|
||||
if results.len() >= MAX_NUMBER_OF_FACETS {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -422,6 +403,50 @@ impl<'a> SearchForFacetValues<'a> {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn fetch_original_facets_using_normalized(
|
||||
&self,
|
||||
fid: FieldId,
|
||||
value: &str,
|
||||
query: &str,
|
||||
search_candidates: &RoaringBitmap,
|
||||
results: &mut Vec<FacetValueHit>,
|
||||
) -> Result<ControlFlow<()>> {
|
||||
let index = self.search_query.index;
|
||||
let rtxn = self.search_query.rtxn;
|
||||
|
||||
let database = index.facet_id_normalized_string_strings;
|
||||
let key = (fid, value);
|
||||
let original_strings = match database.get(rtxn, &key)? {
|
||||
Some(original_strings) => original_strings,
|
||||
None => {
|
||||
error!("the facet value is missing from the facet database: {key:?}");
|
||||
return Ok(ControlFlow::Continue(()));
|
||||
}
|
||||
};
|
||||
for original in original_strings {
|
||||
let key = FacetGroupKey { field_id: fid, level: 0, left_bound: original.as_str() };
|
||||
let docids = match index.facet_id_string_docids.get(rtxn, &key)? {
|
||||
Some(FacetGroupValue { bitmap, .. }) => bitmap,
|
||||
None => {
|
||||
error!("the facet value is missing from the facet database: {key:?}");
|
||||
return Ok(ControlFlow::Continue(()));
|
||||
}
|
||||
};
|
||||
let count = search_candidates.intersection_len(&docids);
|
||||
if count != 0 {
|
||||
let value = self
|
||||
.one_original_value_of(fid, &original, docids.min().unwrap())?
|
||||
.unwrap_or_else(|| query.to_string());
|
||||
results.push(FacetValueHit { value, count });
|
||||
}
|
||||
if results.len() >= MAX_NUMBER_OF_FACETS {
|
||||
return Ok(ControlFlow::Break(()));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(ControlFlow::Continue(()))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, serde::Serialize, PartialEq)]
|
||||
|
|
|
@ -100,7 +100,7 @@ fn facet_number_values<'a>(
|
|||
}
|
||||
|
||||
/// Return an iterator over each string value in the given field of the given document.
|
||||
fn facet_string_values<'a>(
|
||||
pub fn facet_string_values<'a>(
|
||||
docid: u32,
|
||||
field_id: u16,
|
||||
index: &Index,
|
||||
|
|
|
@ -6,6 +6,7 @@ use heed::{RoPrefix, RoTxn};
|
|||
use roaring::RoaringBitmap;
|
||||
use rstar::RTree;
|
||||
|
||||
use super::facet_string_values;
|
||||
use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait};
|
||||
use crate::heed_codec::facet::{FieldDocIdFacetCodec, OrderedF64Codec};
|
||||
use crate::score_details::{self, ScoreDetails};
|
||||
|
@ -157,23 +158,7 @@ impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
|
|||
let mut documents = self
|
||||
.geo_candidates
|
||||
.iter()
|
||||
.map(|id| -> Result<_> {
|
||||
Ok((
|
||||
id,
|
||||
[
|
||||
facet_number_values(id, lat, ctx.index, ctx.txn)?
|
||||
.next()
|
||||
.expect("A geo faceted document doesn't contain any lat")?
|
||||
.0
|
||||
.2,
|
||||
facet_number_values(id, lng, ctx.index, ctx.txn)?
|
||||
.next()
|
||||
.expect("A geo faceted document doesn't contain any lng")?
|
||||
.0
|
||||
.2,
|
||||
],
|
||||
))
|
||||
})
|
||||
.map(|id| -> Result<_> { Ok((id, geo_value(id, lat, lng, ctx.index, ctx.txn)?)) })
|
||||
.collect::<Result<Vec<(u32, [f64; 2])>>>()?;
|
||||
// computing the distance between two points is expensive thus we cache the result
|
||||
documents
|
||||
|
@ -185,6 +170,37 @@ impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Extracts the lat and long values from a single document.
|
||||
///
|
||||
/// If it is not able to find it in the facet number index it will extract it
|
||||
/// from the facet string index and parse it as f64 (as the geo extraction behaves).
|
||||
fn geo_value(
|
||||
docid: u32,
|
||||
field_lat: u16,
|
||||
field_lng: u16,
|
||||
index: &Index,
|
||||
rtxn: &RoTxn,
|
||||
) -> Result<[f64; 2]> {
|
||||
let extract_geo = |geo_field: u16| -> Result<f64> {
|
||||
match facet_number_values(docid, geo_field, index, rtxn)?.next() {
|
||||
Some(Ok(((_, _, geo), ()))) => Ok(geo),
|
||||
Some(Err(e)) => Err(e.into()),
|
||||
None => match facet_string_values(docid, geo_field, index, rtxn)?.next() {
|
||||
Some(Ok((_, geo))) => {
|
||||
Ok(geo.parse::<f64>().expect("cannot parse geo field as f64"))
|
||||
}
|
||||
Some(Err(e)) => Err(e.into()),
|
||||
None => panic!("A geo faceted document doesn't contain any lat or lng"),
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
let lat = extract_geo(field_lat)?;
|
||||
let lng = extract_geo(field_lng)?;
|
||||
|
||||
Ok([lat, lng])
|
||||
}
|
||||
|
||||
impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
|
||||
fn id(&self) -> String {
|
||||
"geo_sort".to_owned()
|
||||
|
|
|
@ -28,7 +28,7 @@ use db_cache::DatabaseCache;
|
|||
use exact_attribute::ExactAttribute;
|
||||
use graph_based_ranking_rule::{Exactness, Fid, Position, Proximity, Typo};
|
||||
use heed::RoTxn;
|
||||
use hnsw::Searcher;
|
||||
use instant_distance::Search;
|
||||
use interner::{DedupInterner, Interner};
|
||||
pub use logger::visual::VisualSearchLogger;
|
||||
pub use logger::{DefaultSearchLogger, SearchLogger};
|
||||
|
@ -40,18 +40,18 @@ use ranking_rules::{
|
|||
use resolve_query_graph::{compute_query_graph_docids, PhraseDocIdsCache};
|
||||
use roaring::RoaringBitmap;
|
||||
use sort::Sort;
|
||||
use space::Neighbor;
|
||||
|
||||
use self::distinct::facet_string_values;
|
||||
use self::geo_sort::GeoSort;
|
||||
pub use self::geo_sort::Strategy as GeoSortStrategy;
|
||||
use self::graph_based_ranking_rule::Words;
|
||||
use self::interner::Interned;
|
||||
use crate::distance::NDotProductPoint;
|
||||
use crate::error::FieldIdMapMissingEntry;
|
||||
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
||||
use crate::search::new::distinct::apply_distinct_rule;
|
||||
use crate::{
|
||||
normalize_vector, AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy,
|
||||
UserError, BEU32,
|
||||
AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError, BEU32,
|
||||
};
|
||||
|
||||
/// A structure used throughout the execution of a search query.
|
||||
|
@ -85,7 +85,12 @@ impl<'ctx> SearchContext<'ctx> {
|
|||
let searchable_names = self.index.searchable_fields(self.txn)?;
|
||||
|
||||
let mut restricted_fids = Vec::new();
|
||||
let mut contains_wildcard = false;
|
||||
for field_name in searchable_attributes {
|
||||
if field_name == "*" {
|
||||
contains_wildcard = true;
|
||||
continue;
|
||||
}
|
||||
let searchable_contains_name =
|
||||
searchable_names.as_ref().map(|sn| sn.iter().any(|name| name == field_name));
|
||||
let fid = match (fids_map.id(field_name), searchable_contains_name) {
|
||||
|
@ -99,8 +104,10 @@ impl<'ctx> SearchContext<'ctx> {
|
|||
}
|
||||
.into())
|
||||
}
|
||||
// The field is not searchable, but the searchableAttributes are set to * => ignore field
|
||||
(None, None) => continue,
|
||||
// The field is not searchable => User error
|
||||
_otherwise => {
|
||||
(_fid, Some(false)) => {
|
||||
let mut valid_fields: BTreeSet<_> =
|
||||
fids_map.names().map(String::from).collect();
|
||||
|
||||
|
@ -132,7 +139,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||
restricted_fids.push(fid);
|
||||
}
|
||||
|
||||
self.restricted_fids = Some(restricted_fids);
|
||||
self.restricted_fids = (!contains_wildcard).then_some(restricted_fids);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
@ -437,29 +444,31 @@ pub fn execute_search(
|
|||
check_sort_criteria(ctx, sort_criteria.as_ref())?;
|
||||
|
||||
if let Some(vector) = vector {
|
||||
let mut searcher = Searcher::new();
|
||||
let hnsw = ctx.index.vector_hnsw(ctx.txn)?.unwrap_or_default();
|
||||
let ef = hnsw.len().min(100);
|
||||
let mut dest = vec![Neighbor { index: 0, distance: 0 }; ef];
|
||||
let vector = normalize_vector(vector.clone());
|
||||
let neighbors = hnsw.nearest(&vector, ef, &mut searcher, &mut dest[..]);
|
||||
let mut search = Search::default();
|
||||
let docids = match ctx.index.vector_hnsw(ctx.txn)? {
|
||||
Some(hnsw) => {
|
||||
let vector = NDotProductPoint::new(vector.clone());
|
||||
let neighbors = hnsw.search(&vector, &mut search);
|
||||
|
||||
let mut docids = Vec::new();
|
||||
let mut uniq_docids = RoaringBitmap::new();
|
||||
for Neighbor { index, distance: _ } in neighbors.iter() {
|
||||
let index = BEU32::new(*index as u32);
|
||||
let docid = ctx.index.vector_id_docid.get(ctx.txn, &index)?.unwrap().get();
|
||||
if universe.contains(docid) && uniq_docids.insert(docid) {
|
||||
docids.push(docid);
|
||||
if docids.len() == (from + length) {
|
||||
break;
|
||||
let mut docids = Vec::new();
|
||||
let mut uniq_docids = RoaringBitmap::new();
|
||||
for instant_distance::Item { distance: _, pid, point: _ } in neighbors {
|
||||
let index = BEU32::new(pid.into_inner());
|
||||
let docid = ctx.index.vector_id_docid.get(ctx.txn, &index)?.unwrap().get();
|
||||
if universe.contains(docid) && uniq_docids.insert(docid) {
|
||||
docids.push(docid);
|
||||
if docids.len() == (from + length) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// return the nearest documents that are also part of the candidates
|
||||
// along with a dummy list of scores that are useless in this context.
|
||||
let docids: Vec<_> = docids.into_iter().skip(from).take(length).collect();
|
||||
// return the nearest documents that are also part of the candidates
|
||||
// along with a dummy list of scores that are useless in this context.
|
||||
docids.into_iter().skip(from).take(length).collect()
|
||||
}
|
||||
None => Vec::new(),
|
||||
};
|
||||
|
||||
return Ok(PartialSearchResult {
|
||||
candidates: universe,
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{ComputedCondition, RankingRuleGraphTrait};
|
||||
use crate::score_details::{Rank, ScoreDetails};
|
||||
use crate::score_details::{self, Rank, ScoreDetails};
|
||||
use crate::search::new::interner::{DedupInterner, Interned};
|
||||
use crate::search::new::query_term::{ExactTerm, LocatedQueryTermSubset};
|
||||
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids;
|
||||
|
@ -87,6 +87,6 @@ impl RankingRuleGraphTrait for ExactnessGraph {
|
|||
}
|
||||
|
||||
fn rank_to_score(rank: Rank) -> ScoreDetails {
|
||||
ScoreDetails::Exactness(rank)
|
||||
ScoreDetails::ExactWords(score_details::ExactWords::from_rank(rank))
|
||||
}
|
||||
}
|
||||
|
|
|
@ -15,10 +15,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
ExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 10,
|
||||
max_rank: 10,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 9,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -35,10 +35,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 7,
|
||||
max_rank: 10,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 6,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -55,10 +55,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
ExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 9,
|
||||
max_rank: 9,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 8,
|
||||
max_matching_words: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -75,10 +75,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 6,
|
||||
max_rank: 9,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 5,
|
||||
max_matching_words: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -95,10 +95,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
ExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 7,
|
||||
max_matching_words: 7,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -115,10 +115,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 7,
|
||||
max_matching_words: 7,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -135,10 +135,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 5,
|
||||
max_rank: 8,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 4,
|
||||
max_matching_words: 7,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -155,10 +155,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 5,
|
||||
max_rank: 8,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 4,
|
||||
max_matching_words: 7,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -175,10 +175,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
ExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 6,
|
||||
max_rank: 6,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 5,
|
||||
max_matching_words: 5,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -195,10 +195,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 3,
|
||||
max_rank: 6,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 2,
|
||||
max_matching_words: 5,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -215,10 +215,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
ExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 5,
|
||||
max_rank: 5,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 4,
|
||||
max_matching_words: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -235,10 +235,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 3,
|
||||
max_rank: 5,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 2,
|
||||
max_matching_words: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -255,10 +255,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
ExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 3,
|
||||
max_matching_words: 3,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -275,10 +275,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 4,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 1,
|
||||
max_matching_words: 3,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -295,10 +295,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
ExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 3,
|
||||
max_rank: 3,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 2,
|
||||
max_matching_words: 2,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -315,10 +315,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 3,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 1,
|
||||
max_matching_words: 2,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -335,10 +335,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
ExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 2,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 1,
|
||||
max_matching_words: 1,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -355,10 +355,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
ExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 2,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 1,
|
||||
max_matching_words: 1,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
|
|
@ -15,10 +15,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 7,
|
||||
max_rank: 8,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 6,
|
||||
max_matching_words: 7,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -35,10 +35,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 7,
|
||||
max_rank: 8,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 6,
|
||||
max_matching_words: 7,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -55,10 +55,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 7,
|
||||
max_rank: 8,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 6,
|
||||
max_matching_words: 7,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -75,10 +75,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 4,
|
||||
max_rank: 5,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 3,
|
||||
max_matching_words: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -95,10 +95,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 2,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 0,
|
||||
max_matching_words: 1,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
|
|
@ -15,10 +15,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
ExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 7,
|
||||
max_matching_words: 7,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -35,10 +35,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
MatchesStart,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 7,
|
||||
max_matching_words: 7,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -55,10 +55,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 7,
|
||||
max_matching_words: 7,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -75,10 +75,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 7,
|
||||
max_rank: 8,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 6,
|
||||
max_matching_words: 7,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -95,10 +95,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 5,
|
||||
max_rank: 5,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 4,
|
||||
max_matching_words: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -115,10 +115,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
ExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 2,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 1,
|
||||
max_matching_words: 1,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
|
|
@ -15,10 +15,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
ExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 6,
|
||||
max_rank: 6,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 5,
|
||||
max_matching_words: 5,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -35,10 +35,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
MatchesStart,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 6,
|
||||
max_rank: 6,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 5,
|
||||
max_matching_words: 5,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -55,10 +55,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 6,
|
||||
max_rank: 6,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 5,
|
||||
max_matching_words: 5,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -75,10 +75,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 3,
|
||||
max_rank: 3,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 2,
|
||||
max_matching_words: 2,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
|
|
@ -15,10 +15,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
ExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 3,
|
||||
max_rank: 3,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 2,
|
||||
max_matching_words: 2,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -35,10 +35,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
MatchesStart,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 3,
|
||||
max_rank: 3,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 2,
|
||||
max_matching_words: 2,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -55,10 +55,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 3,
|
||||
max_rank: 3,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 2,
|
||||
max_matching_words: 2,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
|
|
@ -15,10 +15,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 5,
|
||||
max_rank: 5,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 4,
|
||||
max_matching_words: 4,
|
||||
},
|
||||
),
|
||||
Typo(
|
||||
|
@ -41,10 +41,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 4,
|
||||
max_rank: 5,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 3,
|
||||
max_matching_words: 4,
|
||||
},
|
||||
),
|
||||
Typo(
|
||||
|
@ -67,10 +67,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 4,
|
||||
max_rank: 5,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 3,
|
||||
max_matching_words: 4,
|
||||
},
|
||||
),
|
||||
Typo(
|
||||
|
@ -93,10 +93,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 4,
|
||||
max_rank: 5,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 3,
|
||||
max_matching_words: 4,
|
||||
},
|
||||
),
|
||||
Typo(
|
||||
|
@ -119,10 +119,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 3,
|
||||
max_rank: 5,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 2,
|
||||
max_matching_words: 4,
|
||||
},
|
||||
),
|
||||
Typo(
|
||||
|
|
|
@ -15,10 +15,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
ExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 10,
|
||||
max_rank: 10,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 9,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -35,10 +35,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
ExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 9,
|
||||
max_rank: 9,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 8,
|
||||
max_matching_words: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -55,10 +55,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
ExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 7,
|
||||
max_matching_words: 7,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -75,10 +75,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 7,
|
||||
max_matching_words: 7,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -95,10 +95,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
ExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 6,
|
||||
max_rank: 6,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 5,
|
||||
max_matching_words: 5,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -115,10 +115,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
ExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 5,
|
||||
max_rank: 5,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 4,
|
||||
max_matching_words: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -135,10 +135,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
ExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 3,
|
||||
max_matching_words: 3,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -155,10 +155,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
ExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 3,
|
||||
max_rank: 3,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 2,
|
||||
max_matching_words: 2,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -175,10 +175,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
ExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 2,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 1,
|
||||
max_matching_words: 1,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
|
|
@ -15,10 +15,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 10,
|
||||
max_rank: 10,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 9,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -35,10 +35,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 3,
|
||||
max_matching_words: 3,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -55,10 +55,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 3,
|
||||
max_rank: 3,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 2,
|
||||
max_matching_words: 2,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -75,10 +75,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 3,
|
||||
max_rank: 3,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 2,
|
||||
max_matching_words: 2,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -95,10 +95,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 2,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 1,
|
||||
max_matching_words: 1,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -115,10 +115,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 2,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 1,
|
||||
max_matching_words: 1,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
|
|
@ -15,10 +15,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
ExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 10,
|
||||
max_rank: 10,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 9,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -35,10 +35,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 10,
|
||||
max_rank: 10,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 9,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -55,10 +55,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
MatchesStart,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 2,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 1,
|
||||
max_matching_words: 1,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -75,10 +75,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 2,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 1,
|
||||
max_matching_words: 1,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -95,10 +95,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 2,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 1,
|
||||
max_matching_words: 1,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -115,10 +115,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 2,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 1,
|
||||
max_matching_words: 1,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -135,10 +135,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 2,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 1,
|
||||
max_matching_words: 1,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
|
|
@ -15,10 +15,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
ExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 10,
|
||||
max_rank: 10,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 9,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -35,10 +35,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 10,
|
||||
max_rank: 10,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 9,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -55,10 +55,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
MatchesStart,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 2,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 1,
|
||||
max_matching_words: 1,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -75,10 +75,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 2,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 1,
|
||||
max_matching_words: 1,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -95,10 +95,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 2,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 1,
|
||||
max_matching_words: 1,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -115,10 +115,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 2,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 1,
|
||||
max_matching_words: 1,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -135,10 +135,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 2,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 1,
|
||||
max_matching_words: 1,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
|
|
@ -15,10 +15,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 10,
|
||||
max_rank: 10,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 9,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
|
@ -41,10 +41,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 10,
|
||||
max_rank: 10,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 9,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
|
@ -67,10 +67,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 10,
|
||||
max_rank: 10,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 9,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
|
|
|
@ -15,10 +15,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
ExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 10,
|
||||
max_rank: 10,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 9,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
|
@ -41,10 +41,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 10,
|
||||
max_rank: 10,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 9,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
|
@ -67,10 +67,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 10,
|
||||
max_rank: 10,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 9,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
|
@ -93,10 +93,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
MatchesStart,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 5,
|
||||
max_rank: 5,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 4,
|
||||
max_matching_words: 4,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
|
@ -119,10 +119,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
MatchesStart,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 5,
|
||||
max_rank: 5,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 4,
|
||||
max_matching_words: 4,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
|
@ -145,10 +145,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
MatchesStart,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 5,
|
||||
max_rank: 5,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 4,
|
||||
max_matching_words: 4,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
|
@ -171,10 +171,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 5,
|
||||
max_rank: 5,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 4,
|
||||
max_matching_words: 4,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
|
@ -197,10 +197,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 5,
|
||||
max_rank: 5,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 4,
|
||||
max_matching_words: 4,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
|
@ -223,10 +223,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 5,
|
||||
max_rank: 5,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 4,
|
||||
max_matching_words: 4,
|
||||
},
|
||||
),
|
||||
Proximity(
|
||||
|
|
|
@ -21,10 +21,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
ExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 5,
|
||||
max_rank: 5,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 4,
|
||||
max_matching_words: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -47,10 +47,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 4,
|
||||
max_rank: 5,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 3,
|
||||
max_matching_words: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -73,10 +73,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 4,
|
||||
max_rank: 5,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 3,
|
||||
max_matching_words: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -99,10 +99,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 3,
|
||||
max_rank: 5,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 2,
|
||||
max_matching_words: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
|
|
@ -15,10 +15,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
ExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 10,
|
||||
max_rank: 10,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 9,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -35,10 +35,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 7,
|
||||
max_rank: 10,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 6,
|
||||
max_matching_words: 9,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -55,10 +55,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
ExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 9,
|
||||
max_rank: 9,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 8,
|
||||
max_matching_words: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -75,10 +75,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 6,
|
||||
max_rank: 9,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 5,
|
||||
max_matching_words: 8,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -95,10 +95,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
ExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 7,
|
||||
max_matching_words: 7,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -115,10 +115,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 8,
|
||||
max_rank: 8,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 7,
|
||||
max_matching_words: 7,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -135,10 +135,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 5,
|
||||
max_rank: 8,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 4,
|
||||
max_matching_words: 7,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -155,10 +155,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 5,
|
||||
max_rank: 8,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 4,
|
||||
max_matching_words: 7,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -175,10 +175,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
ExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 6,
|
||||
max_rank: 6,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 5,
|
||||
max_matching_words: 5,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -195,10 +195,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 3,
|
||||
max_rank: 6,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 2,
|
||||
max_matching_words: 5,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -215,10 +215,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
ExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 5,
|
||||
max_rank: 5,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 4,
|
||||
max_matching_words: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -235,10 +235,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 3,
|
||||
max_rank: 5,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 2,
|
||||
max_matching_words: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -255,10 +255,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
ExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 3,
|
||||
max_matching_words: 3,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -275,10 +275,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 4,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 1,
|
||||
max_matching_words: 3,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -295,10 +295,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
ExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 3,
|
||||
max_rank: 3,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 2,
|
||||
max_matching_words: 2,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -315,10 +315,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 3,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 1,
|
||||
max_matching_words: 2,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -335,10 +335,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
ExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 2,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 1,
|
||||
max_matching_words: 1,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -355,10 +355,10 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
ExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 2,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 1,
|
||||
max_matching_words: 1,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
|
|
@ -37,10 +37,10 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 3,
|
||||
max_matching_words: 3,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -78,10 +78,10 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 3,
|
||||
max_matching_words: 3,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -119,10 +119,10 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 3,
|
||||
max_matching_words: 3,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
|
|
@ -120,10 +120,10 @@ fn test_ignore_stop_words() {
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 2,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 1,
|
||||
max_matching_words: 1,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -173,10 +173,10 @@ fn test_ignore_stop_words() {
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 2,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 1,
|
||||
max_matching_words: 1,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -226,10 +226,10 @@ fn test_ignore_stop_words() {
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 2,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 1,
|
||||
max_matching_words: 1,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -278,10 +278,10 @@ fn test_ignore_stop_words() {
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 3,
|
||||
max_rank: 3,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 2,
|
||||
max_matching_words: 2,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -337,10 +337,10 @@ fn test_stop_words_in_phrase() {
|
|||
ExactAttribute(
|
||||
MatchesStart,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 2,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 1,
|
||||
max_matching_words: 1,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -378,10 +378,10 @@ fn test_stop_words_in_phrase() {
|
|||
ExactAttribute(
|
||||
MatchesStart,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 2,
|
||||
max_rank: 2,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 1,
|
||||
max_matching_words: 1,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -430,10 +430,10 @@ fn test_stop_words_in_phrase() {
|
|||
ExactAttribute(
|
||||
NoExactMatch,
|
||||
),
|
||||
Exactness(
|
||||
Rank {
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
ExactWords(
|
||||
ExactWords {
|
||||
matching_words: 3,
|
||||
max_matching_words: 3,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
|
|
@ -36,6 +36,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
|
|||
script_language_docids,
|
||||
facet_id_f64_docids,
|
||||
facet_id_string_docids,
|
||||
facet_id_normalized_string_strings,
|
||||
facet_id_string_fst,
|
||||
facet_id_exists_docids,
|
||||
facet_id_is_null_docids,
|
||||
|
@ -94,6 +95,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
|
|||
word_prefix_fid_docids.clear(self.wtxn)?;
|
||||
script_language_docids.clear(self.wtxn)?;
|
||||
facet_id_f64_docids.clear(self.wtxn)?;
|
||||
facet_id_normalized_string_strings.clear(self.wtxn)?;
|
||||
facet_id_string_fst.clear(self.wtxn)?;
|
||||
facet_id_exists_docids.clear(self.wtxn)?;
|
||||
facet_id_is_null_docids.clear(self.wtxn)?;
|
||||
|
|
|
@ -4,10 +4,9 @@ use std::collections::{BTreeSet, HashMap, HashSet};
|
|||
use fst::IntoStreamer;
|
||||
use heed::types::{ByteSlice, DecodeIgnore, Str, UnalignedSlice};
|
||||
use heed::{BytesDecode, BytesEncode, Database, RwIter};
|
||||
use hnsw::Searcher;
|
||||
use instant_distance::PointId;
|
||||
use roaring::RoaringBitmap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use space::KnnPoints;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use super::facet::delete::FacetsDelete;
|
||||
|
@ -239,6 +238,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||
word_prefix_fid_docids,
|
||||
facet_id_f64_docids: _,
|
||||
facet_id_string_docids: _,
|
||||
facet_id_normalized_string_strings: _,
|
||||
facet_id_string_fst: _,
|
||||
field_id_docid_facet_f64s: _,
|
||||
field_id_docid_facet_strings: _,
|
||||
|
@ -438,24 +438,24 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||
|
||||
// An ugly and slow way to remove the vectors from the HNSW
|
||||
// It basically reconstructs the HNSW from scratch without editing the current one.
|
||||
let current_hnsw = self.index.vector_hnsw(self.wtxn)?.unwrap_or_default();
|
||||
if !current_hnsw.is_empty() {
|
||||
let mut new_hnsw = Hnsw::default();
|
||||
let mut searcher = Searcher::new();
|
||||
let mut new_vector_id_docids = Vec::new();
|
||||
|
||||
if let Some(current_hnsw) = self.index.vector_hnsw(self.wtxn)? {
|
||||
let mut points = Vec::new();
|
||||
let mut docids = Vec::new();
|
||||
for result in vector_id_docid.iter(self.wtxn)? {
|
||||
let (vector_id, docid) = result?;
|
||||
if !self.to_delete_docids.contains(docid.get()) {
|
||||
let vector = current_hnsw.get_point(vector_id.get() as usize).clone();
|
||||
let vector_id = new_hnsw.insert(vector, &mut searcher);
|
||||
new_vector_id_docids.push((vector_id as u32, docid));
|
||||
let pid = PointId::from(vector_id.get());
|
||||
let vector = current_hnsw[pid].clone();
|
||||
points.push(vector);
|
||||
docids.push(docid);
|
||||
}
|
||||
}
|
||||
|
||||
let (new_hnsw, pids) = Hnsw::builder().build_hnsw(points);
|
||||
|
||||
vector_id_docid.clear(self.wtxn)?;
|
||||
for (vector_id, docid) in new_vector_id_docids {
|
||||
vector_id_docid.put(self.wtxn, &BEU32::new(vector_id), &docid)?;
|
||||
for (pid, docid) in pids.into_iter().zip(docids) {
|
||||
vector_id_docid.put(self.wtxn, &BEU32::new(pid.into_inner()), &docid)?;
|
||||
}
|
||||
self.index.put_vector_hnsw(self.wtxn, &new_hnsw)?;
|
||||
}
|
||||
|
|
|
@ -76,9 +76,14 @@ pub const FACET_MAX_GROUP_SIZE: u8 = 8;
|
|||
pub const FACET_GROUP_SIZE: u8 = 4;
|
||||
pub const FACET_MIN_LEVEL_SIZE: u8 = 5;
|
||||
|
||||
use std::collections::BTreeSet;
|
||||
use std::fs::File;
|
||||
use std::iter::FromIterator;
|
||||
|
||||
use heed::types::DecodeIgnore;
|
||||
use charabia::normalizer::{Normalize, NormalizerOption};
|
||||
use grenad::{CompressionType, SortAlgorithm};
|
||||
use heed::types::{ByteSlice, DecodeIgnore, SerdeJson};
|
||||
use heed::BytesEncode;
|
||||
use log::debug;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
|
@ -87,7 +92,9 @@ use super::FacetsUpdateBulk;
|
|||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||
use crate::heed_codec::ByteSliceRefCodec;
|
||||
use crate::{Index, Result, BEU16};
|
||||
use crate::update::index_documents::create_sorter;
|
||||
use crate::update::merge_btreeset_string;
|
||||
use crate::{BEU16StrCodec, Index, Result, BEU16};
|
||||
|
||||
pub mod bulk;
|
||||
pub mod delete;
|
||||
|
@ -159,26 +166,69 @@ impl<'i> FacetsUpdate<'i> {
|
|||
incremental_update.execute(wtxn)?;
|
||||
}
|
||||
|
||||
// We clear the list of normalized-for-search facets
|
||||
// and the previous FSTs to compute everything from scratch
|
||||
self.index.facet_id_normalized_string_strings.clear(wtxn)?;
|
||||
self.index.facet_id_string_fst.clear(wtxn)?;
|
||||
|
||||
// As we can't use the same write transaction to read and write in two different databases
|
||||
// we must create a temporary sorter that we will write into LMDB afterward.
|
||||
// As multiple unnormalized facet values can become the same normalized facet value
|
||||
// we must merge them together.
|
||||
let mut sorter = create_sorter(
|
||||
SortAlgorithm::Unstable,
|
||||
merge_btreeset_string,
|
||||
CompressionType::None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
|
||||
// We iterate on the list of original, semi-normalized, facet values
|
||||
// and normalize them for search, inserting them in LMDB in any given order.
|
||||
let options = NormalizerOption { lossy: true, ..Default::default() };
|
||||
let database = self.index.facet_id_string_docids.remap_data_type::<DecodeIgnore>();
|
||||
for result in database.iter(wtxn)? {
|
||||
let (facet_group_key, ()) = result?;
|
||||
if let FacetGroupKey { field_id, level: 0, left_bound } = facet_group_key {
|
||||
let normalized_facet = left_bound.normalize(&options);
|
||||
let set = BTreeSet::from_iter(std::iter::once(left_bound));
|
||||
let key = (field_id, normalized_facet.as_ref());
|
||||
let key = BEU16StrCodec::bytes_encode(&key).ok_or(heed::Error::Encoding)?;
|
||||
let val = SerdeJson::bytes_encode(&set).ok_or(heed::Error::Encoding)?;
|
||||
sorter.insert(key, val)?;
|
||||
}
|
||||
}
|
||||
|
||||
// In this loop we don't need to take care of merging bitmaps
|
||||
// as the grenad sorter already merged them for us.
|
||||
let mut merger_iter = sorter.into_stream_merger_iter()?;
|
||||
while let Some((key_bytes, btreeset_bytes)) = merger_iter.next()? {
|
||||
self.index
|
||||
.facet_id_normalized_string_strings
|
||||
.remap_types::<ByteSlice, ByteSlice>()
|
||||
.put(wtxn, key_bytes, btreeset_bytes)?;
|
||||
}
|
||||
|
||||
// We compute one FST by string facet
|
||||
let mut text_fsts = vec![];
|
||||
let mut current_fst: Option<(u16, fst::SetBuilder<Vec<u8>>)> = None;
|
||||
let database = self.index.facet_id_string_docids.remap_data_type::<DecodeIgnore>();
|
||||
let database =
|
||||
self.index.facet_id_normalized_string_strings.remap_data_type::<DecodeIgnore>();
|
||||
for result in database.iter(wtxn)? {
|
||||
let (facet_group_key, _) = result?;
|
||||
if let FacetGroupKey { field_id, level: 0, left_bound } = facet_group_key {
|
||||
current_fst = match current_fst.take() {
|
||||
Some((fid, fst_builder)) if fid != field_id => {
|
||||
let fst = fst_builder.into_set();
|
||||
text_fsts.push((fid, fst));
|
||||
Some((field_id, fst::SetBuilder::memory()))
|
||||
}
|
||||
Some((field_id, fst_builder)) => Some((field_id, fst_builder)),
|
||||
None => Some((field_id, fst::SetBuilder::memory())),
|
||||
};
|
||||
|
||||
if let Some((_, fst_builder)) = current_fst.as_mut() {
|
||||
fst_builder.insert(left_bound)?;
|
||||
let ((field_id, normalized_facet), _) = result?;
|
||||
current_fst = match current_fst.take() {
|
||||
Some((fid, fst_builder)) if fid != field_id => {
|
||||
let fst = fst_builder.into_set();
|
||||
text_fsts.push((fid, fst));
|
||||
Some((field_id, fst::SetBuilder::memory()))
|
||||
}
|
||||
Some((field_id, fst_builder)) => Some((field_id, fst_builder)),
|
||||
None => Some((field_id, fst::SetBuilder::memory())),
|
||||
};
|
||||
|
||||
if let Some((_, fst_builder)) = current_fst.as_mut() {
|
||||
fst_builder.insert(normalized_facet)?;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -187,9 +237,6 @@ impl<'i> FacetsUpdate<'i> {
|
|||
text_fsts.push((field_id, fst));
|
||||
}
|
||||
|
||||
// We remove all of the previous FSTs that were in this database
|
||||
self.index.facet_id_string_fst.clear(wtxn)?;
|
||||
|
||||
// We write those FSTs in LMDB now
|
||||
for (field_id, fst) in text_fsts {
|
||||
self.index.facet_id_string_fst.put(wtxn, &BEU16::new(field_id), &fst)?;
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
use std::borrow::Cow;
|
||||
use std::collections::BTreeSet;
|
||||
use std::io;
|
||||
use std::result::Result as StdResult;
|
||||
|
||||
|
@ -44,6 +45,27 @@ pub fn merge_roaring_bitmaps<'a>(_key: &[u8], values: &[Cow<'a, [u8]>]) -> Resul
|
|||
}
|
||||
}
|
||||
|
||||
pub fn merge_btreeset_string<'a>(_key: &[u8], values: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
|
||||
if values.len() == 1 {
|
||||
Ok(values[0].clone())
|
||||
} else {
|
||||
// TODO improve the perf by using a `#[borrow] Cow<str>`.
|
||||
let strings: BTreeSet<String> = values
|
||||
.iter()
|
||||
.map(AsRef::as_ref)
|
||||
.map(serde_json::from_slice::<BTreeSet<String>>)
|
||||
.map(StdResult::unwrap)
|
||||
.reduce(|mut current, new| {
|
||||
for x in new {
|
||||
current.insert(x);
|
||||
}
|
||||
current
|
||||
})
|
||||
.unwrap();
|
||||
Ok(Cow::Owned(serde_json::to_vec(&strings).unwrap()))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn keep_first<'a>(_key: &[u8], values: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
|
||||
Ok(values[0].clone())
|
||||
}
|
||||
|
|
|
@ -13,9 +13,9 @@ pub use grenad_helpers::{
|
|||
GrenadParameters, MergeableReader,
|
||||
};
|
||||
pub use merge_functions::{
|
||||
concat_u32s_array, keep_first, keep_latest_obkv, merge_cbo_roaring_bitmaps,
|
||||
merge_obkvs_and_operations, merge_roaring_bitmaps, merge_two_obkvs, serialize_roaring_bitmap,
|
||||
MergeFn,
|
||||
concat_u32s_array, keep_first, keep_latest_obkv, merge_btreeset_string,
|
||||
merge_cbo_roaring_bitmaps, merge_obkvs_and_operations, merge_roaring_bitmaps, merge_two_obkvs,
|
||||
serialize_roaring_bitmap, MergeFn,
|
||||
};
|
||||
|
||||
use crate::MAX_WORD_LENGTH;
|
||||
|
|
|
@ -26,7 +26,7 @@ pub use self::enrich::{
|
|||
};
|
||||
pub use self::helpers::{
|
||||
as_cloneable_grenad, create_sorter, create_writer, fst_stream_into_hashset,
|
||||
fst_stream_into_vec, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps,
|
||||
fst_stream_into_vec, merge_btreeset_string, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps,
|
||||
sorter_into_lmdb_database, valid_lmdb_key, writer_into_reader, ClonableMmap, MergeFn,
|
||||
};
|
||||
use self::helpers::{grenad_obkv_into_chunks, GrenadParameters};
|
||||
|
|
|
@ -9,22 +9,19 @@ use charabia::{Language, Script};
|
|||
use grenad::MergerBuilder;
|
||||
use heed::types::ByteSlice;
|
||||
use heed::RwTxn;
|
||||
use hnsw::Searcher;
|
||||
use roaring::RoaringBitmap;
|
||||
use space::KnnPoints;
|
||||
|
||||
use super::helpers::{
|
||||
self, merge_ignore_values, serialize_roaring_bitmap, valid_lmdb_key, CursorClonableMmap,
|
||||
};
|
||||
use super::{ClonableMmap, MergeFn};
|
||||
use crate::distance::NDotProductPoint;
|
||||
use crate::error::UserError;
|
||||
use crate::facet::FacetType;
|
||||
use crate::index::Hnsw;
|
||||
use crate::update::facet::FacetsUpdate;
|
||||
use crate::update::index_documents::helpers::{as_cloneable_grenad, try_split_array_at};
|
||||
use crate::{
|
||||
lat_lng_to_xyz, normalize_vector, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index, Result,
|
||||
BEU32,
|
||||
};
|
||||
use crate::{lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index, Result, BEU32};
|
||||
|
||||
pub(crate) enum TypedChunk {
|
||||
FieldIdDocidFacetStrings(grenad::Reader<CursorClonableMmap>),
|
||||
|
@ -292,17 +289,20 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||
index.put_geo_faceted_documents_ids(wtxn, &geo_faceted_docids)?;
|
||||
}
|
||||
TypedChunk::VectorPoints(vector_points) => {
|
||||
let mut hnsw = index.vector_hnsw(wtxn)?.unwrap_or_default();
|
||||
let mut searcher = Searcher::new();
|
||||
|
||||
let mut expected_dimensions = match index.vector_id_docid.iter(wtxn)?.next() {
|
||||
Some(result) => {
|
||||
let (vector_id, _) = result?;
|
||||
Some(hnsw.get_point(vector_id.get() as usize).len())
|
||||
}
|
||||
None => None,
|
||||
let (pids, mut points): (Vec<_>, Vec<_>) = match index.vector_hnsw(wtxn)? {
|
||||
Some(hnsw) => hnsw.iter().map(|(pid, point)| (pid, point.clone())).unzip(),
|
||||
None => Default::default(),
|
||||
};
|
||||
|
||||
// Convert the PointIds into DocumentIds
|
||||
let mut docids = Vec::new();
|
||||
for pid in pids {
|
||||
let docid =
|
||||
index.vector_id_docid.get(wtxn, &BEU32::new(pid.into_inner()))?.unwrap();
|
||||
docids.push(docid.get());
|
||||
}
|
||||
|
||||
let mut expected_dimensions = points.get(0).map(|p| p.len());
|
||||
let mut cursor = vector_points.into_cursor()?;
|
||||
while let Some((key, value)) = cursor.move_on_next()? {
|
||||
// convert the key back to a u32 (4 bytes)
|
||||
|
@ -318,12 +318,26 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||
return Err(UserError::InvalidVectorDimensions { expected, found })?;
|
||||
}
|
||||
|
||||
let vector = normalize_vector(vector);
|
||||
let vector_id = hnsw.insert(vector, &mut searcher) as u32;
|
||||
index.vector_id_docid.put(wtxn, &BEU32::new(vector_id), &BEU32::new(docid))?;
|
||||
points.push(NDotProductPoint::new(vector));
|
||||
docids.push(docid);
|
||||
}
|
||||
log::debug!("There are {} entries in the HNSW so far", hnsw.len());
|
||||
index.put_vector_hnsw(wtxn, &hnsw)?;
|
||||
|
||||
assert_eq!(docids.len(), points.len());
|
||||
|
||||
let hnsw_length = points.len();
|
||||
let (new_hnsw, pids) = Hnsw::builder().build_hnsw(points);
|
||||
|
||||
index.vector_id_docid.clear(wtxn)?;
|
||||
for (docid, pid) in docids.into_iter().zip(pids) {
|
||||
index.vector_id_docid.put(
|
||||
wtxn,
|
||||
&BEU32::new(pid.into_inner()),
|
||||
&BEU32::new(docid),
|
||||
)?;
|
||||
}
|
||||
|
||||
log::debug!("There are {} entries in the HNSW so far", hnsw_length);
|
||||
index.put_vector_hnsw(wtxn, &new_hnsw)?;
|
||||
}
|
||||
TypedChunk::ScriptLanguageDocids(hash_pair) => {
|
||||
let mut buffer = Vec::new();
|
||||
|
|
|
@ -4,8 +4,9 @@ pub use self::delete_documents::{DeleteDocuments, DeletionStrategy, DocumentDele
|
|||
pub use self::facet::bulk::FacetsUpdateBulk;
|
||||
pub use self::facet::incremental::FacetsUpdateIncrementalInner;
|
||||
pub use self::index_documents::{
|
||||
merge_cbo_roaring_bitmaps, merge_roaring_bitmaps, DocumentAdditionResult, DocumentId,
|
||||
IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, MergeFn,
|
||||
merge_btreeset_string, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps,
|
||||
DocumentAdditionResult, DocumentId, IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod,
|
||||
MergeFn,
|
||||
};
|
||||
pub use self::indexer_config::IndexerConfig;
|
||||
pub use self::prefix_word_pairs::{
|
||||
|
|
|
@ -466,13 +466,14 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||
let current = self.index.stop_words(self.wtxn)?;
|
||||
|
||||
// Apply an unlossy normalization on stop_words
|
||||
let stop_words = stop_words
|
||||
let stop_words: BTreeSet<String> = stop_words
|
||||
.iter()
|
||||
.map(|w| w.as_str().normalize(&Default::default()).into_owned());
|
||||
.map(|w| w.as_str().normalize(&Default::default()).into_owned())
|
||||
.collect();
|
||||
|
||||
// since we can't compare a BTreeSet with an FST we are going to convert the
|
||||
// BTreeSet to an FST and then compare bytes per bytes the two FSTs.
|
||||
let fst = fst::Set::from_iter(stop_words)?;
|
||||
let fst = fst::Set::from_iter(stop_words.into_iter())?;
|
||||
|
||||
// Does the new FST differ from the previous one?
|
||||
if current
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue