mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-03 11:57:07 +02:00
WIP multi embedders
fixed template bugs
This commit is contained in:
parent
abbe131084
commit
922a640188
20 changed files with 438 additions and 158 deletions
|
@ -36,7 +36,7 @@ use crate::routes::{create_all_stats, Stats};
|
|||
use crate::search::{
|
||||
FacetSearchResult, MatchingStrategy, SearchQuery, SearchQueryWithIndex, SearchResult,
|
||||
DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
|
||||
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
|
||||
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEMANTIC_RATIO,
|
||||
};
|
||||
use crate::Opt;
|
||||
|
||||
|
@ -586,6 +586,11 @@ pub struct SearchAggregator {
|
|||
// vector
|
||||
// The maximum number of floats in a vector request
|
||||
max_vector_size: usize,
|
||||
// Whether the semantic ratio passed to a hybrid search equals the default ratio.
|
||||
semantic_ratio: bool,
|
||||
// Whether a non-default embedder was specified
|
||||
embedder: bool,
|
||||
hybrid: bool,
|
||||
|
||||
// every time a search is done, we increment the counter linked to the used settings
|
||||
matching_strategy: HashMap<String, usize>,
|
||||
|
@ -639,6 +644,7 @@ impl SearchAggregator {
|
|||
crop_marker,
|
||||
matching_strategy,
|
||||
attributes_to_search_on,
|
||||
hybrid,
|
||||
} = query;
|
||||
|
||||
let mut ret = Self::default();
|
||||
|
@ -712,6 +718,12 @@ impl SearchAggregator {
|
|||
ret.show_ranking_score = *show_ranking_score;
|
||||
ret.show_ranking_score_details = *show_ranking_score_details;
|
||||
|
||||
if let Some(hybrid) = hybrid {
|
||||
ret.semantic_ratio = hybrid.semantic_ratio != DEFAULT_SEMANTIC_RATIO();
|
||||
ret.embedder = hybrid.embedder.is_some();
|
||||
ret.hybrid = true;
|
||||
}
|
||||
|
||||
ret
|
||||
}
|
||||
|
||||
|
@ -765,6 +777,9 @@ impl SearchAggregator {
|
|||
facets_total_number_of_facets,
|
||||
show_ranking_score,
|
||||
show_ranking_score_details,
|
||||
semantic_ratio,
|
||||
embedder,
|
||||
hybrid,
|
||||
} = other;
|
||||
|
||||
if self.timestamp.is_none() {
|
||||
|
@ -810,6 +825,9 @@ impl SearchAggregator {
|
|||
|
||||
// vector
|
||||
self.max_vector_size = self.max_vector_size.max(max_vector_size);
|
||||
self.semantic_ratio |= semantic_ratio;
|
||||
self.hybrid |= hybrid;
|
||||
self.embedder |= embedder;
|
||||
|
||||
// pagination
|
||||
self.max_limit = self.max_limit.max(max_limit);
|
||||
|
@ -878,6 +896,9 @@ impl SearchAggregator {
|
|||
facets_total_number_of_facets,
|
||||
show_ranking_score,
|
||||
show_ranking_score_details,
|
||||
semantic_ratio,
|
||||
embedder,
|
||||
hybrid,
|
||||
} = self;
|
||||
|
||||
if total_received == 0 {
|
||||
|
@ -917,6 +938,11 @@ impl SearchAggregator {
|
|||
"vector": {
|
||||
"max_vector_size": max_vector_size,
|
||||
},
|
||||
"hybrid": {
|
||||
"enabled": hybrid,
|
||||
"semantic_ratio": semantic_ratio,
|
||||
"embedder": embedder,
|
||||
},
|
||||
"pagination": {
|
||||
"max_limit": max_limit,
|
||||
"max_offset": max_offset,
|
||||
|
@ -1012,6 +1038,7 @@ impl MultiSearchAggregator {
|
|||
crop_marker: _,
|
||||
matching_strategy: _,
|
||||
attributes_to_search_on: _,
|
||||
hybrid: _,
|
||||
} = query;
|
||||
|
||||
index_uid.as_str()
|
||||
|
@ -1158,6 +1185,7 @@ impl FacetSearchAggregator {
|
|||
filter,
|
||||
matching_strategy,
|
||||
attributes_to_search_on,
|
||||
hybrid,
|
||||
} = query;
|
||||
|
||||
let mut ret = Self::default();
|
||||
|
@ -1171,7 +1199,8 @@ impl FacetSearchAggregator {
|
|||
|| vector.is_some()
|
||||
|| filter.is_some()
|
||||
|| *matching_strategy != MatchingStrategy::default()
|
||||
|| attributes_to_search_on.is_some();
|
||||
|| attributes_to_search_on.is_some()
|
||||
|| hybrid.is_some();
|
||||
|
||||
ret
|
||||
}
|
||||
|
|
|
@ -14,9 +14,9 @@ use crate::analytics::{Analytics, FacetSearchAggregator};
|
|||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::search::{
|
||||
add_search_rules, perform_facet_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH,
|
||||
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
|
||||
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
|
||||
add_search_rules, perform_facet_search, HybridQuery, MatchingStrategy, SearchQuery,
|
||||
DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
|
||||
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
|
||||
};
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
|
@ -37,6 +37,8 @@ pub struct FacetSearchQuery {
|
|||
pub q: Option<String>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchVector>)]
|
||||
pub vector: Option<Vec<f32>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidHybridQuery>)]
|
||||
pub hybrid: Option<HybridQuery>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
|
||||
pub filter: Option<Value>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchMatchingStrategy>, default)]
|
||||
|
@ -96,6 +98,7 @@ impl From<FacetSearchQuery> for SearchQuery {
|
|||
filter,
|
||||
matching_strategy,
|
||||
attributes_to_search_on,
|
||||
hybrid,
|
||||
} = value;
|
||||
|
||||
SearchQuery {
|
||||
|
@ -120,6 +123,7 @@ impl From<FacetSearchQuery> for SearchQuery {
|
|||
matching_strategy,
|
||||
vector: vector.map(VectorQuery::Vector),
|
||||
attributes_to_search_on,
|
||||
hybrid,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,7 +8,7 @@ use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
|
|||
use meilisearch_types::error::deserr_codes::*;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::milli::VectorQuery;
|
||||
use meilisearch_types::milli::{self, VectorQuery};
|
||||
use meilisearch_types::serde_cs::vec::CS;
|
||||
use serde_json::Value;
|
||||
|
||||
|
@ -17,9 +17,9 @@ use crate::extractors::authentication::policies::*;
|
|||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::search::{
|
||||
add_search_rules, perform_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH,
|
||||
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
|
||||
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
|
||||
add_search_rules, perform_search, HybridQuery, MatchingStrategy, SearchQuery,
|
||||
DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
|
||||
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
|
||||
};
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
|
@ -75,6 +75,10 @@ pub struct SearchQueryGet {
|
|||
matching_strategy: MatchingStrategy,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToSearchOn>)]
|
||||
pub attributes_to_search_on: Option<CS<String>>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidHybridQuery>)]
|
||||
pub hybrid_embedder: Option<String>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidHybridQuery>)]
|
||||
pub hybrid_semantic_ratio: Option<f32>,
|
||||
}
|
||||
|
||||
impl From<SearchQueryGet> for SearchQuery {
|
||||
|
@ -87,6 +91,18 @@ impl From<SearchQueryGet> for SearchQuery {
|
|||
None => None,
|
||||
};
|
||||
|
||||
let hybrid = match (other.hybrid_embedder, other.hybrid_semantic_ratio) {
|
||||
(None, None) => None,
|
||||
(None, Some(semantic_ratio)) => Some(HybridQuery { semantic_ratio, embedder: None }),
|
||||
(Some(embedder), None) => Some(HybridQuery {
|
||||
semantic_ratio: DEFAULT_SEMANTIC_RATIO(),
|
||||
embedder: Some(embedder),
|
||||
}),
|
||||
(Some(embedder), Some(semantic_ratio)) => {
|
||||
Some(HybridQuery { semantic_ratio, embedder: Some(embedder) })
|
||||
}
|
||||
};
|
||||
|
||||
Self {
|
||||
q: other.q,
|
||||
vector: other.vector.map(CS::into_inner).map(VectorQuery::Vector),
|
||||
|
@ -109,6 +125,7 @@ impl From<SearchQueryGet> for SearchQuery {
|
|||
crop_marker: other.crop_marker,
|
||||
matching_strategy: other.matching_strategy,
|
||||
attributes_to_search_on: other.attributes_to_search_on.map(|o| o.into_iter().collect()),
|
||||
hybrid,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -159,6 +176,9 @@ pub async fn search_with_url_query(
|
|||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let features = index_scheduler.features();
|
||||
|
||||
embed(&mut query, index_scheduler.get_ref(), &index).await?;
|
||||
|
||||
let search_result =
|
||||
tokio::task::spawn_blocking(move || perform_search(&index, query, features)).await?;
|
||||
if let Ok(ref search_result) = search_result {
|
||||
|
@ -213,22 +233,31 @@ pub async fn search_with_post(
|
|||
pub async fn embed(
|
||||
query: &mut SearchQuery,
|
||||
index_scheduler: &IndexScheduler,
|
||||
index: &meilisearch_types::milli::Index,
|
||||
index: &milli::Index,
|
||||
) -> Result<(), ResponseError> {
|
||||
if let Some(VectorQuery::String(prompt)) = query.vector.take() {
|
||||
let embedder_configs = index.embedding_configs(&index.read_txn()?)?;
|
||||
let embedder = index_scheduler.embedders(embedder_configs)?;
|
||||
|
||||
/// FIXME: add error if no embedder, remove unwrap, support multiple embedders
|
||||
let embedder_name = if let Some(HybridQuery {
|
||||
semantic_ratio: _,
|
||||
embedder: Some(embedder),
|
||||
}) = &query.hybrid
|
||||
{
|
||||
embedder
|
||||
} else {
|
||||
"default"
|
||||
};
|
||||
|
||||
let embeddings = embedder
|
||||
.get("default")
|
||||
.unwrap()
|
||||
.get(embedder_name)
|
||||
.ok_or(milli::UserError::InvalidEmbedder(embedder_name.to_owned()))
|
||||
.map_err(milli::Error::from)?
|
||||
.0
|
||||
.embed(vec![prompt])
|
||||
.await
|
||||
.map_err(meilisearch_types::milli::vector::Error::from)
|
||||
.map_err(meilisearch_types::milli::UserError::from)
|
||||
.map_err(meilisearch_types::milli::Error::from)?
|
||||
.map_err(milli::vector::Error::from)
|
||||
.map_err(milli::Error::from)?
|
||||
.pop()
|
||||
.expect("No vector returned from embedding");
|
||||
|
||||
|
|
|
@ -36,6 +36,7 @@ pub const DEFAULT_CROP_LENGTH: fn() -> usize = || 10;
|
|||
pub const DEFAULT_CROP_MARKER: fn() -> String = || "…".to_string();
|
||||
pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "<em>".to_string();
|
||||
pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string();
|
||||
pub const DEFAULT_SEMANTIC_RATIO: fn() -> f32 = || 0.5;
|
||||
|
||||
#[derive(Debug, Clone, Default, PartialEq, Deserr)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
|
@ -44,6 +45,8 @@ pub struct SearchQuery {
|
|||
pub q: Option<String>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchVector>)]
|
||||
pub vector: Option<milli::VectorQuery>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidHybridQuery>)]
|
||||
pub hybrid: Option<HybridQuery>,
|
||||
#[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
|
||||
pub offset: usize,
|
||||
#[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
|
||||
|
@ -84,6 +87,15 @@ pub struct SearchQuery {
|
|||
pub attributes_to_search_on: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, PartialEq, Deserr)]
|
||||
#[deserr(error = DeserrJsonError<InvalidHybridQuery>, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct HybridQuery {
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSemanticRatio>, default = DEFAULT_SEMANTIC_RATIO())]
|
||||
pub semantic_ratio: f32,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidEmbedder>, default)]
|
||||
pub embedder: Option<String>,
|
||||
}
|
||||
|
||||
impl SearchQuery {
|
||||
pub fn is_finite_pagination(&self) -> bool {
|
||||
self.page.or(self.hits_per_page).is_some()
|
||||
|
@ -103,6 +115,8 @@ pub struct SearchQueryWithIndex {
|
|||
pub q: Option<String>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchQ>)]
|
||||
pub vector: Option<VectorQuery>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidHybridQuery>)]
|
||||
pub hybrid: Option<HybridQuery>,
|
||||
#[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
|
||||
pub offset: usize,
|
||||
#[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
|
||||
|
@ -168,6 +182,7 @@ impl SearchQueryWithIndex {
|
|||
crop_marker,
|
||||
matching_strategy,
|
||||
attributes_to_search_on,
|
||||
hybrid,
|
||||
} = self;
|
||||
(
|
||||
index_uid,
|
||||
|
@ -193,6 +208,7 @@ impl SearchQueryWithIndex {
|
|||
crop_marker,
|
||||
matching_strategy,
|
||||
attributes_to_search_on,
|
||||
hybrid,
|
||||
// do not use ..Default::default() here,
|
||||
// rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
|
||||
},
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue