5187: Bring back v1.12.0 of pre-release changes into `main` r=irevoire a=curquiza



Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com>
Co-authored-by: Many the fish <many@meilisearch.com>
This commit is contained in:
meili-bors[bot] 2024-12-23 10:59:33 +00:00 committed by GitHub
commit d3491851bc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
168 changed files with 5778 additions and 2556 deletions

View file

@ -207,7 +207,11 @@ impl<'a> Search<'a> {
Ok(embedding) => embedding,
Err(error) => {
tracing::error!(error=%error, "Embedding failed");
return Ok((keyword_results, Some(0)));
return Ok(return_keyword_results(
self.limit,
self.offset,
keyword_results,
));
}
}
}

View file

@ -274,7 +274,7 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
last_match_last_token_position_plus_one
} else {
// we have matched the end of possible tokens, there's nothing to advance
tokens.len() - 1
tokens.len()
}
};

View file

@ -49,6 +49,7 @@ pub use self::geo_sort::Strategy as GeoSortStrategy;
use self::graph_based_ranking_rule::Words;
use self::interner::Interned;
use self::vector_sort::VectorSort;
use crate::index::PrefixSearch;
use crate::localized_attributes_rules::LocalizedFieldIds;
use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::search::new::distinct::apply_distinct_rule;
@ -68,6 +69,7 @@ pub struct SearchContext<'ctx> {
pub term_interner: Interner<QueryTerm>,
pub phrase_docids: PhraseDocIdsCache,
pub restricted_fids: Option<RestrictedFids>,
pub prefix_search: PrefixSearch,
}
impl<'ctx> SearchContext<'ctx> {
@ -85,6 +87,8 @@ impl<'ctx> SearchContext<'ctx> {
}
}
let prefix_search = index.prefix_search(txn)?.unwrap_or_default();
Ok(Self {
index,
txn,
@ -94,9 +98,14 @@ impl<'ctx> SearchContext<'ctx> {
term_interner: <_>::default(),
phrase_docids: <_>::default(),
restricted_fids: None,
prefix_search,
})
}
pub fn is_prefix_search_allowed(&self) -> bool {
self.prefix_search != PrefixSearch::Disabled
}
pub fn attributes_to_search_on(
&mut self,
attributes_to_search_on: &'ctx [String],

View file

@ -28,6 +28,7 @@ pub fn located_query_terms_from_tokens(
words_limit: Option<usize>,
) -> Result<ExtractedTokens> {
let nbr_typos = number_of_typos_allowed(ctx)?;
let allow_prefix_search = ctx.is_prefix_search_allowed();
let mut query_terms = Vec::new();
@ -94,7 +95,7 @@ pub fn located_query_terms_from_tokens(
ctx,
word,
nbr_typos(word),
true,
allow_prefix_search,
false,
)?;
let located_term = LocatedQueryTerm {

View file

@ -193,15 +193,23 @@ pub fn compute_phrase_docids(
if words.is_empty() {
return Ok(RoaringBitmap::new());
}
let mut candidates = RoaringBitmap::new();
let mut candidates = None;
for word in words.iter().flatten().copied() {
if let Some(word_docids) = ctx.word_docids(None, Word::Original(word))? {
candidates |= word_docids;
if let Some(candidates) = candidates.as_mut() {
*candidates &= word_docids;
} else {
candidates = Some(word_docids);
}
} else {
return Ok(RoaringBitmap::new());
}
}
let Some(mut candidates) = candidates else {
return Ok(RoaringBitmap::new());
};
let winsize = words.len().min(3);
for win in words.windows(winsize) {

View file

@ -5,6 +5,7 @@ use bumpalo::Bump;
use heed::EnvOpenOptions;
use maplit::{btreemap, hashset};
use crate::progress::Progress;
use crate::update::new::indexer;
use crate::update::{IndexDocumentsMethod, IndexerConfig, Settings};
use crate::vector::EmbeddingConfigs;
@ -72,7 +73,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
None,
&mut new_fields_ids_map,
&|| false,
&|_progress| (),
Progress::default(),
)
.unwrap();
@ -83,6 +84,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
indexer::index(
&mut wtxn,
&index,
&crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
config.grenad_parameters(),
&db_fields_ids_map,
new_fields_ids_map,
@ -90,7 +92,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
&document_changes,
embedders,
&|| false,
&|_| (),
&Progress::default(),
)
.unwrap();