mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 20:37:15 +02:00
Merge pull request #5596 from meilisearch/request-fragments
Request fragments
This commit is contained in:
commit
fef089c7b6
86 changed files with 5207 additions and 1513 deletions
|
@ -197,6 +197,7 @@ struct Infos {
|
|||
experimental_max_number_of_batched_tasks: usize,
|
||||
experimental_limit_batched_tasks_total_size: u64,
|
||||
experimental_network: bool,
|
||||
experimental_multimodal: bool,
|
||||
experimental_chat_completions: bool,
|
||||
experimental_get_task_documents_route: bool,
|
||||
experimental_composite_embedders: bool,
|
||||
|
@ -303,6 +304,7 @@ impl Infos {
|
|||
get_task_documents_route,
|
||||
composite_embedders,
|
||||
chat_completions,
|
||||
multimodal,
|
||||
} = features;
|
||||
|
||||
// We're going to override every sensible information.
|
||||
|
@ -322,6 +324,7 @@ impl Infos {
|
|||
experimental_reduce_indexing_memory_usage,
|
||||
experimental_network: network,
|
||||
experimental_chat_completions: chat_completions,
|
||||
experimental_multimodal: multimodal,
|
||||
experimental_get_task_documents_route: get_task_documents_route,
|
||||
experimental_composite_embedders: composite_embedders,
|
||||
experimental_embedding_cache_entries,
|
||||
|
|
|
@ -76,8 +76,10 @@ pub enum MeilisearchHttpError {
|
|||
DocumentFormat(#[from] DocumentFormatError),
|
||||
#[error(transparent)]
|
||||
Join(#[from] JoinError),
|
||||
#[error("Invalid request: missing `hybrid` parameter when `vector` is present.")]
|
||||
#[error("Invalid request: missing `hybrid` parameter when `vector` or `media` are present.")]
|
||||
MissingSearchHybrid,
|
||||
#[error("Invalid request: both `media` and `vector` parameters are present.")]
|
||||
MediaAndVector,
|
||||
}
|
||||
|
||||
impl MeilisearchHttpError {
|
||||
|
@ -111,6 +113,7 @@ impl ErrorCode for MeilisearchHttpError {
|
|||
MeilisearchHttpError::DocumentFormat(e) => e.error_code(),
|
||||
MeilisearchHttpError::Join(_) => Code::Internal,
|
||||
MeilisearchHttpError::MissingSearchHybrid => Code::MissingSearchHybrid,
|
||||
MeilisearchHttpError::MediaAndVector => Code::InvalidSearchMediaAndVector,
|
||||
MeilisearchHttpError::FederationOptionsInNonFederatedRequest(_) => {
|
||||
Code::InvalidMultiSearchFederationOptions
|
||||
}
|
||||
|
|
|
@ -563,7 +563,7 @@ fn import_dump(
|
|||
let reader = BufReader::new(file);
|
||||
let reader = DocumentsBatchReader::from_reader(reader)?;
|
||||
|
||||
let embedder_configs = index.embedding_configs(&wtxn)?;
|
||||
let embedder_configs = index.embedding_configs().embedding_configs(&wtxn)?;
|
||||
let embedders = index_scheduler.embedders(uid.to_string(), embedder_configs)?;
|
||||
|
||||
let builder = milli::update::IndexDocuments::new(
|
||||
|
|
|
@ -54,6 +54,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
|||
get_task_documents_route: Some(false),
|
||||
composite_embedders: Some(false),
|
||||
chat_completions: Some(false),
|
||||
multimodal: Some(false),
|
||||
})),
|
||||
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
|
||||
{
|
||||
|
@ -100,6 +101,8 @@ pub struct RuntimeTogglableFeatures {
|
|||
pub composite_embedders: Option<bool>,
|
||||
#[deserr(default)]
|
||||
pub chat_completions: Option<bool>,
|
||||
#[deserr(default)]
|
||||
pub multimodal: Option<bool>,
|
||||
}
|
||||
|
||||
impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogglableFeatures {
|
||||
|
@ -113,6 +116,7 @@ impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogg
|
|||
get_task_documents_route,
|
||||
composite_embedders,
|
||||
chat_completions,
|
||||
multimodal,
|
||||
} = value;
|
||||
|
||||
Self {
|
||||
|
@ -124,6 +128,7 @@ impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogg
|
|||
get_task_documents_route: Some(get_task_documents_route),
|
||||
composite_embedders: Some(composite_embedders),
|
||||
chat_completions: Some(chat_completions),
|
||||
multimodal: Some(multimodal),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -138,6 +143,7 @@ pub struct PatchExperimentalFeatureAnalytics {
|
|||
get_task_documents_route: bool,
|
||||
composite_embedders: bool,
|
||||
chat_completions: bool,
|
||||
multimodal: bool,
|
||||
}
|
||||
|
||||
impl Aggregate for PatchExperimentalFeatureAnalytics {
|
||||
|
@ -155,6 +161,7 @@ impl Aggregate for PatchExperimentalFeatureAnalytics {
|
|||
get_task_documents_route: new.get_task_documents_route,
|
||||
composite_embedders: new.composite_embedders,
|
||||
chat_completions: new.chat_completions,
|
||||
multimodal: new.multimodal,
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -181,6 +188,7 @@ impl Aggregate for PatchExperimentalFeatureAnalytics {
|
|||
get_task_documents_route: Some(false),
|
||||
composite_embedders: Some(false),
|
||||
chat_completions: Some(false),
|
||||
multimodal: Some(false),
|
||||
})),
|
||||
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
|
||||
{
|
||||
|
@ -223,6 +231,7 @@ async fn patch_features(
|
|||
.composite_embedders
|
||||
.unwrap_or(old_features.composite_embedders),
|
||||
chat_completions: new_features.0.chat_completions.unwrap_or(old_features.chat_completions),
|
||||
multimodal: new_features.0.multimodal.unwrap_or(old_features.multimodal),
|
||||
};
|
||||
|
||||
// explicitly destructure for analytics rather than using the `Serialize` implementation, because
|
||||
|
@ -237,6 +246,7 @@ async fn patch_features(
|
|||
get_task_documents_route,
|
||||
composite_embedders,
|
||||
chat_completions,
|
||||
multimodal,
|
||||
} = new_features;
|
||||
|
||||
analytics.publish(
|
||||
|
@ -249,6 +259,7 @@ async fn patch_features(
|
|||
get_task_documents_route,
|
||||
composite_embedders,
|
||||
chat_completions,
|
||||
multimodal,
|
||||
},
|
||||
&req,
|
||||
);
|
||||
|
|
|
@ -1452,7 +1452,6 @@ fn some_documents<'a, 't: 'a>(
|
|||
) -> Result<impl Iterator<Item = Result<Document, ResponseError>> + 'a, ResponseError> {
|
||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
let embedding_configs = index.embedding_configs(rtxn)?;
|
||||
|
||||
Ok(index.iter_documents(rtxn, doc_ids)?.map(move |ret| {
|
||||
ret.map_err(ResponseError::from).and_then(|(key, document)| -> Result<_, ResponseError> {
|
||||
|
@ -1468,15 +1467,9 @@ fn some_documents<'a, 't: 'a>(
|
|||
Some(Value::Object(map)) => map,
|
||||
_ => Default::default(),
|
||||
};
|
||||
for (name, vector) in index.embeddings(rtxn, key)? {
|
||||
let user_provided = embedding_configs
|
||||
.iter()
|
||||
.find(|conf| conf.name == name)
|
||||
.is_some_and(|conf| conf.user_provided.contains(key));
|
||||
let embeddings = ExplicitVectors {
|
||||
embeddings: Some(vector.into()),
|
||||
regenerate: !user_provided,
|
||||
};
|
||||
for (name, (vector, regenerate)) in index.embeddings(rtxn, key)? {
|
||||
let embeddings =
|
||||
ExplicitVectors { embeddings: Some(vector.into()), regenerate };
|
||||
vectors.insert(
|
||||
name,
|
||||
serde_json::to_value(embeddings).map_err(MeilisearchHttpError::from)?,
|
||||
|
|
|
@ -56,6 +56,8 @@ pub struct FacetSearchQuery {
|
|||
pub q: Option<String>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchVector>)]
|
||||
pub vector: Option<Vec<f32>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchMedia>)]
|
||||
pub media: Option<Value>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchHybridQuery>)]
|
||||
pub hybrid: Option<HybridQuery>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
|
||||
|
@ -94,6 +96,7 @@ impl FacetSearchAggregator {
|
|||
facet_name,
|
||||
vector,
|
||||
q,
|
||||
media,
|
||||
filter,
|
||||
matching_strategy,
|
||||
attributes_to_search_on,
|
||||
|
@ -108,6 +111,7 @@ impl FacetSearchAggregator {
|
|||
facet_names: Some(facet_name.clone()).into_iter().collect(),
|
||||
additional_search_parameters_provided: q.is_some()
|
||||
|| vector.is_some()
|
||||
|| media.is_some()
|
||||
|| filter.is_some()
|
||||
|| *matching_strategy != MatchingStrategy::default()
|
||||
|| attributes_to_search_on.is_some()
|
||||
|
@ -291,6 +295,7 @@ impl From<FacetSearchQuery> for SearchQuery {
|
|||
facet_name: _,
|
||||
q,
|
||||
vector,
|
||||
media,
|
||||
filter,
|
||||
matching_strategy,
|
||||
attributes_to_search_on,
|
||||
|
@ -312,6 +317,7 @@ impl From<FacetSearchQuery> for SearchQuery {
|
|||
|
||||
SearchQuery {
|
||||
q,
|
||||
media,
|
||||
offset: DEFAULT_SEARCH_OFFSET(),
|
||||
limit: DEFAULT_SEARCH_LIMIT(),
|
||||
page,
|
||||
|
|
|
@ -205,6 +205,8 @@ impl TryFrom<SearchQueryGet> for SearchQuery {
|
|||
|
||||
Ok(Self {
|
||||
q: other.q,
|
||||
// `media` not supported for `GET`
|
||||
media: None,
|
||||
vector: other.vector.map(CS::into_inner),
|
||||
offset: other.offset.0,
|
||||
limit: other.limit.0,
|
||||
|
@ -481,28 +483,30 @@ pub fn search_kind(
|
|||
index_uid: String,
|
||||
index: &milli::Index,
|
||||
) -> Result<SearchKind, ResponseError> {
|
||||
let is_placeholder_query =
|
||||
if let Some(q) = query.q.as_deref() { q.trim().is_empty() } else { true };
|
||||
let non_placeholder_query = !is_placeholder_query;
|
||||
let is_media = query.media.is_some();
|
||||
// handle with care, the order of cases matters, the semantics is subtle
|
||||
match (query.q.as_deref(), &query.hybrid, query.vector.as_deref()) {
|
||||
// empty query, no vector => placeholder search
|
||||
(Some(q), _, None) if q.trim().is_empty() => Ok(SearchKind::KeywordOnly),
|
||||
// no query, no vector => placeholder search
|
||||
(None, _, None) => Ok(SearchKind::KeywordOnly),
|
||||
// hybrid.semantic_ratio == 1.0 => vector
|
||||
(_, Some(HybridQuery { semantic_ratio, embedder }), v) if **semantic_ratio == 1.0 => {
|
||||
SearchKind::semantic(index_scheduler, index_uid, index, embedder, v.map(|v| v.len()))
|
||||
}
|
||||
// hybrid.semantic_ratio == 0.0 => keyword
|
||||
(_, Some(HybridQuery { semantic_ratio, embedder: _ }), _) if **semantic_ratio == 0.0 => {
|
||||
match (is_media, non_placeholder_query, &query.hybrid, query.vector.as_deref()) {
|
||||
// media + vector => error
|
||||
(true, _, _, Some(_)) => Err(MeilisearchHttpError::MediaAndVector.into()),
|
||||
// media + !hybrid => error
|
||||
(true, _, None, _) => Err(MeilisearchHttpError::MissingSearchHybrid.into()),
|
||||
// vector + !hybrid => error
|
||||
(_, _, None, Some(_)) => Err(MeilisearchHttpError::MissingSearchHybrid.into()),
|
||||
// hybrid S0 => keyword
|
||||
(_, _, Some(HybridQuery { semantic_ratio, embedder: _ }), _) if **semantic_ratio == 0.0 => {
|
||||
Ok(SearchKind::KeywordOnly)
|
||||
}
|
||||
// no query, hybrid, vector => semantic
|
||||
(None, Some(HybridQuery { semantic_ratio: _, embedder }), Some(v)) => {
|
||||
SearchKind::semantic(index_scheduler, index_uid, index, embedder, Some(v.len()))
|
||||
// !q + !vector => placeholder search
|
||||
(false, false, _, None) => Ok(SearchKind::KeywordOnly),
|
||||
// hybrid S100 => semantic
|
||||
(_, _, Some(HybridQuery { semantic_ratio, embedder }), v) if **semantic_ratio == 1.0 => {
|
||||
SearchKind::semantic(index_scheduler, index_uid, index, embedder, v.map(|v| v.len()))
|
||||
}
|
||||
// query, no hybrid, no vector => keyword
|
||||
(Some(_), None, None) => Ok(SearchKind::KeywordOnly),
|
||||
// query, hybrid, maybe vector => hybrid
|
||||
(Some(_), Some(HybridQuery { semantic_ratio, embedder }), v) => SearchKind::hybrid(
|
||||
// q + hybrid => hybrid
|
||||
(_, true, Some(HybridQuery { semantic_ratio, embedder }), v) => SearchKind::hybrid(
|
||||
index_scheduler,
|
||||
index_uid,
|
||||
index,
|
||||
|
@ -510,7 +514,11 @@ pub fn search_kind(
|
|||
**semantic_ratio,
|
||||
v.map(|v| v.len()),
|
||||
),
|
||||
|
||||
(_, None, Some(_)) => Err(MeilisearchHttpError::MissingSearchHybrid.into()),
|
||||
// !q + hybrid => semantic
|
||||
(_, false, Some(HybridQuery { semantic_ratio: _, embedder }), v) => {
|
||||
SearchKind::semantic(index_scheduler, index_uid, index, embedder, v.map(|v| v.len()))
|
||||
}
|
||||
// q => keyword
|
||||
(false, true, None, None) => Ok(SearchKind::KeywordOnly),
|
||||
}
|
||||
}
|
||||
|
|
|
@ -61,6 +61,8 @@ pub struct SearchAggregator<Method: AggregateMethod> {
|
|||
semantic_ratio: bool,
|
||||
hybrid: bool,
|
||||
retrieve_vectors: bool,
|
||||
// Number of requests containing `media`
|
||||
total_media: usize,
|
||||
|
||||
// every time a search is done, we increment the counter linked to the used settings
|
||||
matching_strategy: HashMap<String, usize>,
|
||||
|
@ -101,6 +103,7 @@ impl<Method: AggregateMethod> SearchAggregator<Method> {
|
|||
let SearchQuery {
|
||||
q,
|
||||
vector,
|
||||
media,
|
||||
offset,
|
||||
limit,
|
||||
page,
|
||||
|
@ -175,6 +178,11 @@ impl<Method: AggregateMethod> SearchAggregator<Method> {
|
|||
if let Some(ref vector) = vector {
|
||||
ret.max_vector_size = vector.len();
|
||||
}
|
||||
|
||||
if media.is_some() {
|
||||
ret.total_media = 1;
|
||||
}
|
||||
|
||||
ret.retrieve_vectors |= retrieve_vectors;
|
||||
|
||||
if query.is_finite_pagination() {
|
||||
|
@ -277,6 +285,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
|
|||
show_ranking_score_details,
|
||||
semantic_ratio,
|
||||
hybrid,
|
||||
total_media,
|
||||
total_degraded,
|
||||
total_used_negative_operator,
|
||||
ranking_score_threshold,
|
||||
|
@ -327,6 +336,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
|
|||
self.retrieve_vectors |= retrieve_vectors;
|
||||
self.semantic_ratio |= semantic_ratio;
|
||||
self.hybrid |= hybrid;
|
||||
self.total_media += total_media;
|
||||
|
||||
// pagination
|
||||
self.max_limit = self.max_limit.max(max_limit);
|
||||
|
@ -403,6 +413,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
|
|||
show_ranking_score_details,
|
||||
semantic_ratio,
|
||||
hybrid,
|
||||
total_media,
|
||||
total_degraded,
|
||||
total_used_negative_operator,
|
||||
ranking_score_threshold,
|
||||
|
@ -450,6 +461,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
|
|||
"hybrid": {
|
||||
"enabled": hybrid,
|
||||
"semantic_ratio": semantic_ratio,
|
||||
"total_media": total_media,
|
||||
},
|
||||
"pagination": {
|
||||
"max_limit": max_limit,
|
||||
|
|
|
@ -755,6 +755,14 @@ fn validate_settings(
|
|||
if matches!(embedder.indexing_embedder, Setting::Set(_)) {
|
||||
features.check_composite_embedders("setting `indexingEmbedder`")?;
|
||||
}
|
||||
|
||||
if matches!(embedder.indexing_fragments, Setting::Set(_)) {
|
||||
features.check_multimodal("setting `indexingFragments`")?;
|
||||
}
|
||||
|
||||
if matches!(embedder.search_fragments, Setting::Set(_)) {
|
||||
features.check_multimodal("setting `searchFragments`")?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -42,6 +42,7 @@ impl MultiSearchAggregator {
|
|||
federation_options,
|
||||
q: _,
|
||||
vector: _,
|
||||
media: _,
|
||||
offset: _,
|
||||
limit: _,
|
||||
page: _,
|
||||
|
|
|
@ -64,6 +64,8 @@ pub struct SearchQuery {
|
|||
pub q: Option<String>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchVector>)]
|
||||
pub vector: Option<Vec<f32>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchMedia>)]
|
||||
pub media: Option<serde_json::Value>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchHybridQuery>)]
|
||||
pub hybrid: Option<HybridQuery>,
|
||||
#[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
|
||||
|
@ -147,6 +149,7 @@ impl From<SearchParameters> for SearchQuery {
|
|||
ranking_score_threshold: ranking_score_threshold.map(RankingScoreThreshold::from),
|
||||
q: None,
|
||||
vector: None,
|
||||
media: None,
|
||||
offset: DEFAULT_SEARCH_OFFSET(),
|
||||
page: None,
|
||||
hits_per_page: None,
|
||||
|
@ -220,6 +223,7 @@ impl fmt::Debug for SearchQuery {
|
|||
let Self {
|
||||
q,
|
||||
vector,
|
||||
media,
|
||||
hybrid,
|
||||
offset,
|
||||
limit,
|
||||
|
@ -274,6 +278,9 @@ impl fmt::Debug for SearchQuery {
|
|||
);
|
||||
}
|
||||
}
|
||||
if let Some(media) = media {
|
||||
debug.field("media", media);
|
||||
}
|
||||
if let Some(hybrid) = hybrid {
|
||||
debug.field("hybrid", &hybrid);
|
||||
}
|
||||
|
@ -399,10 +406,10 @@ impl SearchKind {
|
|||
route: Route,
|
||||
) -> Result<(String, Arc<Embedder>, bool), ResponseError> {
|
||||
let rtxn = index.read_txn()?;
|
||||
let embedder_configs = index.embedding_configs(&rtxn)?;
|
||||
let embedder_configs = index.embedding_configs().embedding_configs(&rtxn)?;
|
||||
let embedders = index_scheduler.embedders(index_uid, embedder_configs)?;
|
||||
|
||||
let (embedder, _, quantized) = embedders
|
||||
let (embedder, quantized) = embedders
|
||||
.get(embedder_name)
|
||||
.ok_or(match route {
|
||||
Route::Search | Route::MultiSearch => {
|
||||
|
@ -412,6 +419,7 @@ impl SearchKind {
|
|||
milli::UserError::InvalidSimilarEmbedder(embedder_name.to_owned())
|
||||
}
|
||||
})
|
||||
.map(|runtime| (runtime.embedder.clone(), runtime.is_quantized))
|
||||
.map_err(milli::Error::from)?;
|
||||
|
||||
if let Some(vector_len) = vector_len {
|
||||
|
@ -481,8 +489,10 @@ pub struct SearchQueryWithIndex {
|
|||
pub index_uid: IndexUid,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchQ>)]
|
||||
pub q: Option<String>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchQ>)]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchVector>)]
|
||||
pub vector: Option<Vec<f32>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchMedia>)]
|
||||
pub media: Option<serde_json::Value>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchHybridQuery>)]
|
||||
pub hybrid: Option<HybridQuery>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchOffset>)]
|
||||
|
@ -563,6 +573,7 @@ impl SearchQueryWithIndex {
|
|||
let SearchQuery {
|
||||
q,
|
||||
vector,
|
||||
media,
|
||||
hybrid,
|
||||
offset,
|
||||
limit,
|
||||
|
@ -593,6 +604,7 @@ impl SearchQueryWithIndex {
|
|||
index_uid,
|
||||
q,
|
||||
vector,
|
||||
media,
|
||||
hybrid,
|
||||
offset: if offset == DEFAULT_SEARCH_OFFSET() { None } else { Some(offset) },
|
||||
limit: if limit == DEFAULT_SEARCH_LIMIT() { None } else { Some(limit) },
|
||||
|
@ -627,6 +639,7 @@ impl SearchQueryWithIndex {
|
|||
federation_options,
|
||||
q,
|
||||
vector,
|
||||
media,
|
||||
offset,
|
||||
limit,
|
||||
page,
|
||||
|
@ -657,6 +670,7 @@ impl SearchQueryWithIndex {
|
|||
SearchQuery {
|
||||
q,
|
||||
vector,
|
||||
media,
|
||||
offset: offset.unwrap_or(DEFAULT_SEARCH_OFFSET()),
|
||||
limit: limit.unwrap_or(DEFAULT_SEARCH_LIMIT()),
|
||||
page,
|
||||
|
@ -958,6 +972,9 @@ pub fn prepare_search<'t>(
|
|||
time_budget: TimeBudget,
|
||||
features: RoFeatures,
|
||||
) -> Result<(milli::Search<'t>, bool, usize, usize), ResponseError> {
|
||||
if query.media.is_some() {
|
||||
features.check_multimodal("passing `media` in a search query")?;
|
||||
}
|
||||
let mut search = index.search(rtxn);
|
||||
search.time_budget(time_budget);
|
||||
if let Some(ranking_score_threshold) = query.ranking_score_threshold {
|
||||
|
@ -983,14 +1000,27 @@ pub fn prepare_search<'t>(
|
|||
|
||||
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(10);
|
||||
|
||||
let q = query.q.as_deref();
|
||||
let media = query.media.as_ref();
|
||||
|
||||
let search_query = match (q, media) {
|
||||
(Some(text), None) => milli::vector::SearchQuery::Text(text),
|
||||
(q, media) => milli::vector::SearchQuery::Media { q, media },
|
||||
};
|
||||
|
||||
embedder
|
||||
.embed_search(query.q.as_ref().unwrap(), Some(deadline))
|
||||
.embed_search(search_query, Some(deadline))
|
||||
.map_err(milli::vector::Error::from)
|
||||
.map_err(milli::Error::from)?
|
||||
}
|
||||
};
|
||||
|
||||
search.semantic(embedder_name.clone(), embedder.clone(), *quantized, Some(vector));
|
||||
search.semantic(
|
||||
embedder_name.clone(),
|
||||
embedder.clone(),
|
||||
*quantized,
|
||||
Some(vector),
|
||||
query.media.clone(),
|
||||
);
|
||||
}
|
||||
SearchKind::Hybrid { embedder_name, embedder, quantized, semantic_ratio: _ } => {
|
||||
if let Some(q) = &query.q {
|
||||
|
@ -1002,6 +1032,7 @@ pub fn prepare_search<'t>(
|
|||
embedder.clone(),
|
||||
*quantized,
|
||||
query.vector.clone(),
|
||||
query.media.clone(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
@ -1126,6 +1157,7 @@ pub fn perform_search(
|
|||
locales,
|
||||
// already used in prepare_search
|
||||
vector: _,
|
||||
media: _,
|
||||
hybrid: _,
|
||||
offset: _,
|
||||
ranking_score_threshold: _,
|
||||
|
@ -1328,7 +1360,6 @@ struct HitMaker<'a> {
|
|||
vectors_fid: Option<FieldId>,
|
||||
retrieve_vectors: RetrieveVectors,
|
||||
to_retrieve_ids: BTreeSet<FieldId>,
|
||||
embedding_configs: Vec<index::IndexEmbeddingConfig>,
|
||||
formatter_builder: MatcherBuilder<'a>,
|
||||
formatted_options: BTreeMap<FieldId, FormatOptions>,
|
||||
show_ranking_score: bool,
|
||||
|
@ -1443,8 +1474,6 @@ impl<'a> HitMaker<'a> {
|
|||
&displayed_ids,
|
||||
);
|
||||
|
||||
let embedding_configs = index.embedding_configs(rtxn)?;
|
||||
|
||||
Ok(Self {
|
||||
index,
|
||||
rtxn,
|
||||
|
@ -1453,7 +1482,6 @@ impl<'a> HitMaker<'a> {
|
|||
vectors_fid,
|
||||
retrieve_vectors,
|
||||
to_retrieve_ids,
|
||||
embedding_configs,
|
||||
formatter_builder,
|
||||
formatted_options,
|
||||
show_ranking_score: format.show_ranking_score,
|
||||
|
@ -1499,14 +1527,8 @@ impl<'a> HitMaker<'a> {
|
|||
Some(Value::Object(map)) => map,
|
||||
_ => Default::default(),
|
||||
};
|
||||
for (name, vector) in self.index.embeddings(self.rtxn, id)? {
|
||||
let user_provided = self
|
||||
.embedding_configs
|
||||
.iter()
|
||||
.find(|conf| conf.name == name)
|
||||
.is_some_and(|conf| conf.user_provided.contains(id));
|
||||
let embeddings =
|
||||
ExplicitVectors { embeddings: Some(vector.into()), regenerate: !user_provided };
|
||||
for (name, (vector, regenerate)) in self.index.embeddings(self.rtxn, id)? {
|
||||
let embeddings = ExplicitVectors { embeddings: Some(vector.into()), regenerate };
|
||||
vectors.insert(
|
||||
name,
|
||||
serde_json::to_value(embeddings).map_err(InternalError::SerdeJson)?,
|
||||
|
|
|
@ -2188,7 +2188,8 @@ async fn import_dump_v6_containing_experimental_features() {
|
|||
"network": false,
|
||||
"getTaskDocumentsRoute": false,
|
||||
"compositeEmbedders": false,
|
||||
"chatCompletions": false
|
||||
"chatCompletions": false,
|
||||
"multimodal": false
|
||||
}
|
||||
"###);
|
||||
|
||||
|
@ -2314,7 +2315,8 @@ async fn import_dump_v6_containing_batches_and_enqueued_tasks() {
|
|||
"network": false,
|
||||
"getTaskDocumentsRoute": false,
|
||||
"compositeEmbedders": false,
|
||||
"chatCompletions": false
|
||||
"chatCompletions": false,
|
||||
"multimodal": false
|
||||
}
|
||||
"###);
|
||||
|
||||
|
@ -2420,7 +2422,8 @@ async fn generate_and_import_dump_containing_vectors() {
|
|||
"network": false,
|
||||
"getTaskDocumentsRoute": false,
|
||||
"compositeEmbedders": false,
|
||||
"chatCompletions": false
|
||||
"chatCompletions": false,
|
||||
"multimodal": false
|
||||
}
|
||||
"###);
|
||||
|
||||
|
|
|
@ -25,7 +25,8 @@ async fn experimental_features() {
|
|||
"network": false,
|
||||
"getTaskDocumentsRoute": false,
|
||||
"compositeEmbedders": false,
|
||||
"chatCompletions": false
|
||||
"chatCompletions": false,
|
||||
"multimodal": false
|
||||
}
|
||||
"###);
|
||||
|
||||
|
@ -41,7 +42,8 @@ async fn experimental_features() {
|
|||
"network": false,
|
||||
"getTaskDocumentsRoute": false,
|
||||
"compositeEmbedders": false,
|
||||
"chatCompletions": false
|
||||
"chatCompletions": false,
|
||||
"multimodal": false
|
||||
}
|
||||
"###);
|
||||
|
||||
|
@ -57,7 +59,8 @@ async fn experimental_features() {
|
|||
"network": false,
|
||||
"getTaskDocumentsRoute": false,
|
||||
"compositeEmbedders": false,
|
||||
"chatCompletions": false
|
||||
"chatCompletions": false,
|
||||
"multimodal": false
|
||||
}
|
||||
"###);
|
||||
|
||||
|
@ -74,7 +77,8 @@ async fn experimental_features() {
|
|||
"network": false,
|
||||
"getTaskDocumentsRoute": false,
|
||||
"compositeEmbedders": false,
|
||||
"chatCompletions": false
|
||||
"chatCompletions": false,
|
||||
"multimodal": false
|
||||
}
|
||||
"###);
|
||||
|
||||
|
@ -91,7 +95,8 @@ async fn experimental_features() {
|
|||
"network": false,
|
||||
"getTaskDocumentsRoute": false,
|
||||
"compositeEmbedders": false,
|
||||
"chatCompletions": false
|
||||
"chatCompletions": false,
|
||||
"multimodal": false
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
@ -115,7 +120,8 @@ async fn experimental_feature_metrics() {
|
|||
"network": false,
|
||||
"getTaskDocumentsRoute": false,
|
||||
"compositeEmbedders": false,
|
||||
"chatCompletions": false
|
||||
"chatCompletions": false,
|
||||
"multimodal": false
|
||||
}
|
||||
"###);
|
||||
|
||||
|
@ -162,7 +168,7 @@ async fn errors() {
|
|||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"message": "Unknown field `NotAFeature`: expected one of `metrics`, `logsRoute`, `editDocumentsByFunction`, `containsFilter`, `network`, `getTaskDocumentsRoute`, `compositeEmbedders`, `chatCompletions`",
|
||||
"message": "Unknown field `NotAFeature`: expected one of `metrics`, `logsRoute`, `editDocumentsByFunction`, `containsFilter`, `network`, `getTaskDocumentsRoute`, `compositeEmbedders`, `chatCompletions`, `multimodal`",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
|
|
|
@ -499,7 +499,7 @@ async fn query_combination() {
|
|||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Invalid request: missing `hybrid` parameter when `vector` is present.",
|
||||
"message": "Invalid request: missing `hybrid` parameter when `vector` or `media` are present.",
|
||||
"code": "missing_search_hybrid",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#missing_search_hybrid"
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
use std::collections::BTreeMap;
|
||||
use std::sync::atomic::AtomicUsize;
|
||||
use std::time::Duration;
|
||||
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use reqwest::IntoUrl;
|
||||
use std::time::Duration;
|
||||
use tokio::sync::mpsc;
|
||||
use wiremock::matchers::{method, path};
|
||||
use wiremock::{Mock, MockServer, Request, ResponseTemplate};
|
||||
|
@ -408,13 +408,13 @@ async fn bad_request() {
|
|||
.await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Error while generating embeddings: user error: in `request`: \"{{text}}\" not found",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
}
|
||||
"###);
|
||||
{
|
||||
"message": "Error while generating embeddings: user error: in `request`: \"{{text}}\" not found\n - Note: this template is using a document template, and so expects to contain the placeholder \"{{text}}\" rather than \"{{fragment}}\"",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
}
|
||||
"###);
|
||||
|
||||
// A repeat string appears inside a repeated value
|
||||
let (response, code) = index
|
||||
|
@ -437,7 +437,7 @@ async fn bad_request() {
|
|||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Error while generating embeddings: user error: in `request.input.input`: \"{{..}}\" appears nested inside of a value that is itself repeated",
|
||||
"message": "Error while generating embeddings: user error: in `request.input.input`: \"{{..}}\" appears nested inside of a value that is itself repeated\n - Note: this template is using a document template, and so expects to contain the placeholder \"{{text}}\" rather than \"{{fragment}}\"",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
|
@ -460,7 +460,7 @@ async fn bad_request() {
|
|||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Error while generating embeddings: user error: in `request.input.repeat`: \"{{..}}\" appears outside of an array",
|
||||
"message": "Error while generating embeddings: user error: in `request.input.repeat`: \"{{..}}\" appears outside of an array\n - Note: this template is using a document template, and so expects to contain the placeholder \"{{text}}\" rather than \"{{fragment}}\"",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
|
@ -483,7 +483,7 @@ async fn bad_request() {
|
|||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Error while generating embeddings: user error: in `request.input`: \"{{..}}\" expected at position #1, but found at position #0",
|
||||
"message": "Error while generating embeddings: user error: in `request.input`: \"{{..}}\" expected at position #1, but found at position #0\n - Note: this template is using a document template, and so expects to contain the placeholder \"{{text}}\" rather than \"{{fragment}}\"",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
|
@ -506,7 +506,7 @@ async fn bad_request() {
|
|||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Error while generating embeddings: user error: in `request.input`: \"{{..}}\" expected at position #1, but found at position #2",
|
||||
"message": "Error while generating embeddings: user error: in `request.input`: \"{{..}}\" expected at position #1, but found at position #2\n - Note: this template is using a document template, and so expects to contain the placeholder \"{{text}}\" rather than \"{{fragment}}\"",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
|
@ -529,7 +529,7 @@ async fn bad_request() {
|
|||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Error while generating embeddings: user error: in `request.input[0]`: Expected \"{{text}}\" inside of the repeated value",
|
||||
"message": "Error while generating embeddings: user error: in `request.input[0]`: Expected \"{{text}}\" inside of the repeated value\n - Note: this template is using a document template, and so expects to contain the placeholder \"{{text}}\" rather than \"{{fragment}}\"",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
|
@ -556,7 +556,7 @@ async fn bad_request() {
|
|||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Error while generating embeddings: user error: in `request.data`: Found \"{{..}}\", but it was already present in `request.input`",
|
||||
"message": "Error while generating embeddings: user error: in `request.data`: Found \"{{..}}\", but it was already present in `request.input`\n - Note: this template is using a document template, and so expects to contain the placeholder \"{{text}}\" rather than \"{{fragment}}\"",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
|
@ -577,7 +577,7 @@ async fn bad_request() {
|
|||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Error while generating embeddings: user error: in `request.data`: Found \"{{text}}\", but it was already present in `request.input`",
|
||||
"message": "Error while generating embeddings: user error: in `request.data`: Found \"{{text}}\", but it was already present in `request.input`\n - Note: this template is using a document template, and so expects to contain the placeholder \"{{text}}\" rather than \"{{fragment}}\"",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
|
@ -598,7 +598,7 @@ async fn bad_request() {
|
|||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Error while generating embeddings: user error: in `request.repeated.data[1]`: Found \"{{text}}\", but it was already present in `request.repeated.input`",
|
||||
"message": "Error while generating embeddings: user error: in `request.repeated.data[1]`: Found \"{{text}}\", but it was already present in `request.repeated.input`\n - Note: this template is using a document template, and so expects to contain the placeholder \"{{text}}\" rather than \"{{fragment}}\"",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
|
@ -619,7 +619,7 @@ async fn bad_request() {
|
|||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Error while generating embeddings: user error: in `request.data`: Found \"{{text}}\", but it was already present in `request.input[0]` (repeated)",
|
||||
"message": "Error while generating embeddings: user error: in `request.data`: Found \"{{text}}\", but it was already present in `request.input[0]` (repeated)\n - Note: this template is using a document template, and so expects to contain the placeholder \"{{text}}\" rather than \"{{fragment}}\"",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
|
@ -920,7 +920,7 @@ async fn bad_settings() {
|
|||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Error while generating embeddings: user error: in `request`: \"{{text}}\" not found",
|
||||
"message": "Error while generating embeddings: user error: in `request`: \"{{text}}\" not found\n - Note: this template is using a document template, and so expects to contain the placeholder \"{{text}}\" rather than \"{{fragment}}\"",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue