mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-03 11:57:07 +02:00
Small commit to add hybrid search and autoembedding
This commit is contained in:
parent
21bcf32109
commit
13c2c6c16b
42 changed files with 4045 additions and 246 deletions
|
@ -686,7 +686,7 @@ impl SearchAggregator {
|
|||
ret.max_terms_number = q.split_whitespace().count();
|
||||
}
|
||||
|
||||
if let Some(ref vector) = vector {
|
||||
if let Some(meilisearch_types::milli::VectorQuery::Vector(ref vector)) = vector {
|
||||
ret.max_vector_size = vector.len();
|
||||
}
|
||||
|
||||
|
|
|
@ -19,7 +19,11 @@ static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
|||
/// does all the setup before meilisearch is launched
|
||||
fn setup(opt: &Opt) -> anyhow::Result<()> {
|
||||
let mut log_builder = env_logger::Builder::new();
|
||||
log_builder.parse_filters(&opt.log_level.to_string());
|
||||
let log_filters = format!(
|
||||
"{},h2=warn,hyper=warn,tokio_util=warn,tracing=warn,rustls=warn,mio=warn,reqwest=warn",
|
||||
opt.log_level
|
||||
);
|
||||
log_builder.parse_filters(&log_filters);
|
||||
|
||||
log_builder.init();
|
||||
|
||||
|
|
|
@ -7,6 +7,7 @@ use meilisearch_types::deserr::DeserrJsonError;
|
|||
use meilisearch_types::error::deserr_codes::*;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::milli::VectorQuery;
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::analytics::{Analytics, FacetSearchAggregator};
|
||||
|
@ -117,7 +118,7 @@ impl From<FacetSearchQuery> for SearchQuery {
|
|||
highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(),
|
||||
crop_marker: DEFAULT_CROP_MARKER(),
|
||||
matching_strategy,
|
||||
vector,
|
||||
vector: vector.map(VectorQuery::Vector),
|
||||
attributes_to_search_on,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,12 +2,13 @@ use actix_web::web::Data;
|
|||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use deserr::actix_web::{AwebJson, AwebQueryParameter};
|
||||
use index_scheduler::IndexScheduler;
|
||||
use log::debug;
|
||||
use log::{debug, warn};
|
||||
use meilisearch_types::deserr::query_params::Param;
|
||||
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
|
||||
use meilisearch_types::error::deserr_codes::*;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::milli::VectorQuery;
|
||||
use meilisearch_types::serde_cs::vec::CS;
|
||||
use serde_json::Value;
|
||||
|
||||
|
@ -88,7 +89,7 @@ impl From<SearchQueryGet> for SearchQuery {
|
|||
|
||||
Self {
|
||||
q: other.q,
|
||||
vector: other.vector.map(CS::into_inner),
|
||||
vector: other.vector.map(CS::into_inner).map(VectorQuery::Vector),
|
||||
offset: other.offset.0,
|
||||
limit: other.limit.0,
|
||||
page: other.page.as_deref().copied(),
|
||||
|
@ -193,6 +194,9 @@ pub async fn search_with_post(
|
|||
let index = index_scheduler.index(&index_uid)?;
|
||||
|
||||
let features = index_scheduler.features();
|
||||
|
||||
embed(&mut query, index_scheduler.get_ref(), &index).await?;
|
||||
|
||||
let search_result =
|
||||
tokio::task::spawn_blocking(move || perform_search(&index, query, features)).await?;
|
||||
if let Ok(ref search_result) = search_result {
|
||||
|
@ -206,6 +210,38 @@ pub async fn search_with_post(
|
|||
Ok(HttpResponse::Ok().json(search_result))
|
||||
}
|
||||
|
||||
pub async fn embed(
|
||||
query: &mut SearchQuery,
|
||||
index_scheduler: &IndexScheduler,
|
||||
index: &meilisearch_types::milli::Index,
|
||||
) -> Result<(), ResponseError> {
|
||||
if let Some(VectorQuery::String(prompt)) = query.vector.take() {
|
||||
let embedder_configs = index.embedding_configs(&index.read_txn()?)?;
|
||||
let embedder = index_scheduler.embedders(embedder_configs)?;
|
||||
|
||||
/// FIXME: add error if no embedder, remove unwrap, support multiple embedders
|
||||
let embeddings = embedder
|
||||
.get("default")
|
||||
.unwrap()
|
||||
.0
|
||||
.embed(vec![prompt])
|
||||
.await
|
||||
.map_err(meilisearch_types::milli::vector::Error::from)
|
||||
.map_err(meilisearch_types::milli::UserError::from)
|
||||
.map_err(meilisearch_types::milli::Error::from)?
|
||||
.pop()
|
||||
.expect("No vector returned from embedding");
|
||||
|
||||
if embeddings.iter().nth(1).is_some() {
|
||||
warn!("Ignoring embeddings past the first one in long search query");
|
||||
query.vector = Some(VectorQuery::Vector(embeddings.iter().next().unwrap().to_vec()));
|
||||
} else {
|
||||
query.vector = Some(VectorQuery::Vector(embeddings.into_inner()));
|
||||
}
|
||||
};
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
|
|
@ -13,6 +13,7 @@ use crate::analytics::{Analytics, MultiSearchAggregator};
|
|||
use crate::extractors::authentication::policies::ActionPolicy;
|
||||
use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::routes::indexes::search::embed;
|
||||
use crate::search::{
|
||||
add_search_rules, perform_search, SearchQueryWithIndex, SearchResultWithIndex,
|
||||
};
|
||||
|
@ -74,6 +75,8 @@ pub async fn multi_search_with_post(
|
|||
})
|
||||
.with_index(query_index)?;
|
||||
|
||||
embed(&mut query, index_scheduler.get_ref(), &index).await.with_index(query_index)?;
|
||||
|
||||
let search_result =
|
||||
tokio::task::spawn_blocking(move || perform_search(&index, query, features))
|
||||
.await
|
||||
|
|
|
@ -16,6 +16,7 @@ use meilisearch_types::index_uid::IndexUid;
|
|||
use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy};
|
||||
use meilisearch_types::milli::{
|
||||
dot_product_similarity, FacetValueHit, InternalError, OrderBy, SearchForFacetValues,
|
||||
VectorQuery,
|
||||
};
|
||||
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
|
||||
use meilisearch_types::{milli, Document};
|
||||
|
@ -46,7 +47,7 @@ pub struct SearchQuery {
|
|||
#[deserr(default, error = DeserrJsonError<InvalidSearchQ>)]
|
||||
pub q: Option<String>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchVector>)]
|
||||
pub vector: Option<Vec<f32>>,
|
||||
pub vector: Option<milli::VectorQuery>,
|
||||
#[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
|
||||
pub offset: usize,
|
||||
#[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
|
||||
|
@ -105,7 +106,7 @@ pub struct SearchQueryWithIndex {
|
|||
#[deserr(default, error = DeserrJsonError<InvalidSearchQ>)]
|
||||
pub q: Option<String>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchQ>)]
|
||||
pub vector: Option<Vec<f32>>,
|
||||
pub vector: Option<VectorQuery>,
|
||||
#[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
|
||||
pub offset: usize,
|
||||
#[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
|
||||
|
@ -339,11 +340,18 @@ fn prepare_search<'t>(
|
|||
let mut search = index.search(rtxn);
|
||||
|
||||
if query.vector.is_some() && query.q.is_some() {
|
||||
warn!("Ignoring the query string `q` when used with the `vector` parameter.");
|
||||
warn!("Attempting hybrid search");
|
||||
}
|
||||
|
||||
if let Some(ref vector) = query.vector {
|
||||
search.vector(vector.clone());
|
||||
match vector {
|
||||
VectorQuery::Vector(vector) => {
|
||||
search.vector(vector.clone());
|
||||
}
|
||||
VectorQuery::String(_) => {
|
||||
panic!("Failed while preparing search; caller did not generate embedding for query")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(ref query) = query.q {
|
||||
|
@ -375,7 +383,7 @@ fn prepare_search<'t>(
|
|||
}
|
||||
|
||||
if query.vector.is_some() {
|
||||
features.check_vector()?;
|
||||
features.check_vector("Passing `vector` as a query parameter")?;
|
||||
}
|
||||
|
||||
// compute the offset on the limit depending on the pagination mode.
|
||||
|
@ -429,7 +437,11 @@ pub fn perform_search(
|
|||
prepare_search(index, &rtxn, &query, features)?;
|
||||
|
||||
let milli::SearchResult { documents_ids, matching_words, candidates, document_scores, .. } =
|
||||
search.execute()?;
|
||||
if query.q.is_some() && query.vector.is_some() {
|
||||
search.execute_hybrid()?
|
||||
} else {
|
||||
search.execute()?
|
||||
};
|
||||
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
|
||||
|
@ -538,13 +550,13 @@ pub fn perform_search(
|
|||
insert_geo_distance(sort, &mut document);
|
||||
}
|
||||
|
||||
let semantic_score = match query.vector.as_ref() {
|
||||
let semantic_score = /*match query.vector.as_ref() {
|
||||
Some(vector) => match extract_field("_vectors", &fields_ids_map, obkv)? {
|
||||
Some(vectors) => compute_semantic_score(vector, vectors)?,
|
||||
None => None,
|
||||
},
|
||||
None => None,
|
||||
};
|
||||
};*/ None;
|
||||
|
||||
let ranking_score =
|
||||
query.show_ranking_score.then(|| ScoreDetails::global_score(score.iter()));
|
||||
|
@ -629,7 +641,8 @@ pub fn perform_search(
|
|||
hits: documents,
|
||||
hits_info,
|
||||
query: query.q.unwrap_or_default(),
|
||||
vector: query.vector,
|
||||
// FIXME: display input vector
|
||||
vector: None,
|
||||
processing_time_ms: before_search.elapsed().as_millis(),
|
||||
facet_distribution,
|
||||
facet_stats,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue