2024-07-30 15:43:40 +02:00
use std ::collections ::BTreeMap ;
use std ::io ::Write ;
use std ::sync ::atomic ::{ AtomicU32 , Ordering } ;
2024-07-31 15:01:34 +02:00
use std ::sync ::OnceLock ;
2024-07-30 15:43:40 +02:00
use meili_snap ::{ json_string , snapshot } ;
use wiremock ::matchers ::{ method , path } ;
use wiremock ::{ Mock , MockServer , Request , ResponseTemplate } ;
use crate ::common ::{ GetAllDocumentsOptions , Value } ;
use crate ::json ;
use crate ::vector ::get_server_vector ;
#[ derive(serde::Deserialize) ]
struct OpenAiResponses ( BTreeMap < String , OpenAiResponse > ) ;
#[ derive(serde::Deserialize) ]
struct OpenAiResponse {
large : Option < Vec < f32 > > ,
small : Option < Vec < f32 > > ,
ada : Option < Vec < f32 > > ,
large_512 : Option < Vec < f32 > > ,
}
2024-07-31 15:01:34 +02:00
#[ derive(serde::Deserialize) ]
struct OpenAiTokenizedResponses {
tokens : Vec < u64 > ,
embedding : Vec < f32 > ,
}
2024-07-30 15:43:40 +02:00
impl OpenAiResponses {
fn get ( & self , text : & str , model_dimensions : ModelDimensions ) -> Option < & [ f32 ] > {
let entry = self . 0. get ( text ) ? ;
match model_dimensions {
ModelDimensions ::Large = > entry . large . as_deref ( ) ,
ModelDimensions ::Small = > entry . small . as_deref ( ) ,
ModelDimensions ::Ada = > entry . ada . as_deref ( ) ,
ModelDimensions ::Large512 = > entry . large_512 . as_deref ( ) ,
}
}
}
#[ derive(Debug, Clone, Copy, PartialEq, Eq) ]
enum ModelDimensions {
Large ,
Small ,
Ada ,
Large512 ,
}
impl ModelDimensions {
fn add_to_settings ( & self , settings : & mut Value ) {
settings [ " model " ] = serde_json ::json! ( self . model ( ) ) ;
if let ModelDimensions ::Large512 = self {
settings [ " dimensions " ] = serde_json ::json! ( 512 ) ;
}
}
fn model ( & self ) -> & 'static str {
match self {
ModelDimensions ::Large | ModelDimensions ::Large512 = > " text-embedding-3-large " ,
ModelDimensions ::Small = > " text-embedding-3-small " ,
ModelDimensions ::Ada = > " text-embedding-ada-002 " ,
}
}
fn from_request ( request : & serde_json ::Value ) -> Self {
let has_dimensions_512 = if let Some ( dimensions ) = request . get ( " dimensions " ) {
if dimensions ! = 512 {
panic! ( " unsupported dimensions values " )
}
true
} else {
false
} ;
let serde_json ::Value ::String ( model ) = & request [ " model " ] else {
panic! ( " unsupported non string model " )
} ;
match ( model . as_str ( ) , has_dimensions_512 ) {
( " text-embedding-3-large " , true ) = > Self ::Large512 ,
( _ , true ) = > panic! ( " unsupported dimensions with non-large model " ) ,
( " text-embedding-3-large " , false ) = > Self ::Large ,
( " text-embedding-3-small " , false ) = > Self ::Small ,
( " text-embedding-ada-002 " , false ) = > Self ::Ada ,
( _ , false ) = > panic! ( " unsupported model " ) ,
}
}
}
fn openai_responses ( ) -> & 'static OpenAiResponses {
2024-07-31 15:01:34 +02:00
static OPENAI_RESPONSES : OnceLock < OpenAiResponses > = OnceLock ::new ( ) ;
2024-07-30 15:43:40 +02:00
OPENAI_RESPONSES . get_or_init ( | | {
// json file that was compressed with gzip
// decompress with `gzip --keep -d openai_responses.json.gz`
// recompress with `gzip --keep -c openai_responses.json > openai_responses.json.gz`
let compressed_responses = include_bytes! ( " openai_responses.json.gz " ) ;
let mut responses = Vec ::new ( ) ;
let mut decoder = flate2 ::write ::GzDecoder ::new ( & mut responses ) ;
decoder . write_all ( compressed_responses ) . unwrap ( ) ;
drop ( decoder ) ;
serde_json ::from_slice ( & responses ) . unwrap ( )
} )
}
2024-07-31 15:01:34 +02:00
fn openai_tokenized_responses ( ) -> & 'static OpenAiTokenizedResponses {
static OPENAI_TOKENIZED_RESPONSES : OnceLock < OpenAiTokenizedResponses > = OnceLock ::new ( ) ;
OPENAI_TOKENIZED_RESPONSES . get_or_init ( | | {
// json file that was compressed with gzip
// decompress with `gzip --keep -d openai_tokenized_responses.json.gz`
// recompress with `gzip --keep -c openai_tokenized_responses.json > openai_tokenized_responses.json.gz`
let compressed_responses = include_bytes! ( " openai_tokenized_responses.json.gz " ) ;
let mut responses = Vec ::new ( ) ;
let mut decoder = flate2 ::write ::GzDecoder ::new ( & mut responses ) ;
decoder . write_all ( compressed_responses ) . unwrap ( ) ;
drop ( decoder ) ;
serde_json ::from_slice ( & responses ) . unwrap ( )
} )
}
fn long_text ( ) -> & 'static str {
static LONG_TEXT : OnceLock < String > = OnceLock ::new ( ) ;
LONG_TEXT . get_or_init ( | | {
// decompress with `gzip --keep -d intel_gen.txt.gz`
// recompress with `gzip --keep -c intel_gen.txt > intel_gen.txt.gz`
let compressed_long_text = include_bytes! ( " intel_gen.txt.gz " ) ;
let mut long_text = Vec ::new ( ) ;
let mut decoder = flate2 ::write ::GzDecoder ::new ( & mut long_text ) ;
decoder . write_all ( compressed_long_text ) . unwrap ( ) ;
drop ( decoder ) ;
let long_text = std ::str ::from_utf8 ( & long_text ) . unwrap ( ) ;
long_text . repeat ( 3 )
} )
}
async fn create_mock_tokenized ( ) -> ( MockServer , Value ) {
2024-11-06 09:25:41 +01:00
create_mock_with_template ( " {{doc.text}} " , ModelDimensions ::Large , false , false ) . await
2024-07-31 15:01:34 +02:00
}
2024-07-30 15:43:40 +02:00
async fn create_mock_with_template (
document_template : & str ,
model_dimensions : ModelDimensions ,
fallible : bool ,
2024-11-06 09:25:41 +01:00
slow : bool ,
2024-07-30 15:43:40 +02:00
) -> ( MockServer , Value ) {
let mock_server = MockServer ::start ( ) . await ;
const API_KEY : & str = " my-api-key " ;
const API_KEY_BEARER : & str = " Bearer my-api-key " ;
let attempt = AtomicU32 ::new ( 0 ) ;
Mock ::given ( method ( " POST " ) )
. and ( path ( " / " ) )
. respond_with ( move | req : & Request | {
2024-11-06 09:25:41 +01:00
// 0. wait for a long time
if slow {
std ::thread ::sleep ( std ::time ::Duration ::from_secs ( 1 ) ) ;
}
// 1. maybe return 500
2024-07-30 15:43:40 +02:00
if fallible {
let attempt = attempt . fetch_add ( 1 , Ordering ::Relaxed ) ;
let failed = matches! ( attempt % 4 , 0 | 1 | 3 ) ;
if failed {
return ResponseTemplate ::new ( 503 ) . set_body_json ( json! ( {
" error " : {
" message " : " come back later " ,
" type " : " come_back_later "
}
} ) )
}
}
2024-11-21 16:37:55 +01:00
// 2. check API key
2024-07-30 15:43:40 +02:00
match req . headers . get ( " Authorization " ) {
Some ( api_key ) if api_key = = API_KEY_BEARER = > {
{ }
}
Some ( api_key ) = > {
let api_key = api_key . to_str ( ) . unwrap ( ) ;
return ResponseTemplate ::new ( 401 ) . set_body_json (
json! (
{
" error " : {
" message " : format ! ( " Incorrect API key provided: {api_key}. You can find your API key at https://platform.openai.com/account/api-keys. " ) ,
" type " : " invalid_request_error " ,
" param " : serde_json ::Value ::Null ,
" code " : " invalid_api_key "
}
}
) ,
)
}
None = > {
return ResponseTemplate ::new ( 401 ) . set_body_json (
json! (
{
" error " : {
" message " : " You didn't provide an API key. You need to provide your API key in an Authorization header using Bearer auth (i.e. Authorization: Bearer YOUR_KEY), or as the password field (with blank username) if you're accessing the API from your browser and are prompted for a username and password. You can obtain an API key from https://platform.openai.com/account/api-keys. " ,
" type " : " invalid_request_error " ,
" param " : serde_json ::Value ::Null ,
" code " : serde_json ::Value ::Null
}
}
) ,
)
}
}
2024-11-06 09:25:41 +01:00
// 3. parse text inputs
2024-07-30 15:43:40 +02:00
let query : serde_json ::Value = match req . body_json ( ) {
Ok ( query ) = > query ,
Err ( _error ) = > return ResponseTemplate ::new ( 400 ) . set_body_json (
json! (
{
" error " : {
" message " : " We could not parse the JSON body of your request. (HINT: This likely means you aren't using your HTTP library correctly. The OpenAI API expects a JSON payload, but what was sent was not valid JSON. If you have trouble figuring out how to fix this, please contact us through our help center at help.openai.com.) " ,
" type " : " invalid_request_error " ,
" param " : serde_json ::Value ::Null ,
" code " : serde_json ::Value ::Null
}
}
)
)
} ;
let query_model_dimensions = ModelDimensions ::from_request ( & query ) ;
if query_model_dimensions ! = model_dimensions {
2024-07-31 15:01:34 +02:00
panic! ( " Expected {model_dimensions:?} , got {query_model_dimensions:?} " )
2024-07-30 15:43:40 +02:00
}
2024-11-06 09:25:41 +01:00
// 4. for each text, find embedding in responses
2024-07-30 15:43:40 +02:00
let serde_json ::Value ::Array ( inputs ) = & query [ " input " ] else {
2024-07-31 15:01:34 +02:00
panic! ( " Unexpected `input` value " )
2024-07-30 15:43:40 +02:00
} ;
2024-07-31 15:01:34 +02:00
let openai_tokenized_responses = openai_tokenized_responses ( ) ;
let embeddings = if inputs = = openai_tokenized_responses . tokens . as_slice ( ) {
vec! [ openai_tokenized_responses . embedding . clone ( ) ]
} else {
let mut embeddings = Vec ::new ( ) ;
for input in inputs {
let serde_json ::Value ::String ( input ) = input else {
return ResponseTemplate ::new ( 400 ) . set_body_json ( json! ( {
" error " : {
" message " : " Unexpected `input` value " ,
" type " : " test_response " ,
" query " : query
}
} ) )
} ;
2024-07-30 15:43:40 +02:00
2024-07-31 15:01:34 +02:00
if input = = long_text ( ) {
return ResponseTemplate ::new ( 400 ) . set_body_json ( json! (
{
" error " : {
" message " : " This model's maximum context length is 8192 tokens, however you requested 10554 tokens (10554 in your prompt; 0 for the completion). Please reduce your prompt; or completion length. " ,
" type " : " invalid_request_error " ,
" param " : null ,
" code " : null ,
}
}
) ) ;
}
2024-07-30 15:43:40 +02:00
2024-07-31 15:01:34 +02:00
let Some ( embedding ) = openai_responses ( ) . get ( input , model_dimensions ) else {
return ResponseTemplate ::new ( 404 ) . set_body_json ( json! (
{
" error " : {
" message " : " Could not find embedding for text " ,
" text " : input ,
" model_dimensions " : format ! ( " {model_dimensions:?} " ) ,
" type " : " add_to_openai_responses_json_please " ,
" query " : query ,
}
2024-07-30 15:43:40 +02:00
}
2024-07-31 15:01:34 +02:00
) )
} ;
embeddings . push ( embedding . to_vec ( ) ) ;
}
embeddings
} ;
2024-07-30 15:43:40 +02:00
let data : Vec < _ > = embeddings . into_iter ( ) . enumerate ( ) . map ( | ( index , embedding ) | json! ( {
" object " : " embedding " ,
" index " : index ,
" embedding " : embedding ,
} ) ) . collect ( ) ;
2024-11-06 09:25:41 +01:00
// 5. produce output from embeddings
2024-07-30 15:43:40 +02:00
ResponseTemplate ::new ( 200 ) . set_body_json ( json! ( {
" object " : " list " ,
" data " : data ,
" model " : model_dimensions . model ( ) ,
" usage " : {
" prompt_tokens " : " [prompt_tokens] " ,
" total_tokens " : " [total_tokens] "
}
} ) )
} )
. mount ( & mock_server )
. await ;
let url = mock_server . uri ( ) ;
let mut embedder_settings = json! ( {
" source " : " openAi " ,
" url " : url ,
" apiKey " : API_KEY ,
2024-08-28 09:10:09 +02:00
" documentTemplate " : document_template ,
" documentTemplateMaxBytes " : 8000000 ,
2024-07-30 15:43:40 +02:00
} ) ;
model_dimensions . add_to_settings ( & mut embedder_settings ) ;
( mock_server , embedder_settings )
}
const DOGGO_TEMPLATE : & str = r #" {%- if doc.gender == " F " -%}Une chienne nommée {{doc.name}}, née en {{doc.birthyear}}
{ % - else - % }
Un chien nommé { { doc . name } } , né en { { doc . birthyear } }
{ % - endif % } , de race { { doc . breed } } . " #;
async fn create_mock ( ) -> ( MockServer , Value ) {
2024-11-06 09:25:41 +01:00
create_mock_with_template ( DOGGO_TEMPLATE , ModelDimensions ::Large , false , false ) . await
2024-07-30 15:43:40 +02:00
}
async fn create_mock_dimensions ( ) -> ( MockServer , Value ) {
2024-11-06 09:25:41 +01:00
create_mock_with_template ( DOGGO_TEMPLATE , ModelDimensions ::Large512 , false , false ) . await
2024-07-30 15:43:40 +02:00
}
async fn create_mock_small_embedding_model ( ) -> ( MockServer , Value ) {
2024-11-06 09:25:41 +01:00
create_mock_with_template ( DOGGO_TEMPLATE , ModelDimensions ::Small , false , false ) . await
2024-07-30 15:43:40 +02:00
}
async fn create_mock_legacy_embedding_model ( ) -> ( MockServer , Value ) {
2024-11-06 09:25:41 +01:00
create_mock_with_template ( DOGGO_TEMPLATE , ModelDimensions ::Ada , false , false ) . await
2024-07-30 15:43:40 +02:00
}
async fn create_fallible_mock ( ) -> ( MockServer , Value ) {
2024-11-06 09:25:41 +01:00
create_mock_with_template ( DOGGO_TEMPLATE , ModelDimensions ::Large , true , false ) . await
}
async fn create_slow_mock ( ) -> ( MockServer , Value ) {
create_mock_with_template ( DOGGO_TEMPLATE , ModelDimensions ::Large , true , true ) . await
2024-07-30 15:43:40 +02:00
}
// basic test "it works"
#[ actix_rt::test ]
async fn it_works ( ) {
let ( _mock , setting ) = create_mock ( ) . await ;
let server = get_server_vector ( ) . await ;
let index = server . index ( " doggo " ) ;
let ( response , code ) = index
. update_settings ( json! ( {
" embedders " : {
" default " : setting ,
} ,
} ) )
. await ;
snapshot! ( code , @ " 202 Accepted " ) ;
let task = s erver . wait_task ( response . uid ( ) ) . await ;
snapshot! ( task [ " status " ] , @ r ### ""succeeded""### ) ;
let documents = json! ( [
{ " id " : 0 , " name " : " kefir " , " gender " : " M " , " birthyear " : 2023 , " breed " : " Patou " } ,
{ " id " : 1 , " name " : " Intel " , " gender " : " M " , " birthyear " : 2011 , " breed " : " Beagle " } ,
{ " id " : 2 , " name " : " Vénus " , " gender " : " F " , " birthyear " : 2003 , " breed " : " Jack Russel Terrier " } ,
{ " id " : 3 , " name " : " Max " , " gender " : " M " , " birthyear " : 1995 , " breed " : " Labrador Retriever " } ,
] ) ;
let ( value , code ) = index . add_documents ( documents , None ) . await ;
snapshot! ( code , @ " 202 Accepted " ) ;
let task = index . wait_task ( value . uid ( ) ) . await ;
snapshot! ( task , @ r ###"
{
2024-07-31 17:57:55 +02:00
" uid " : " [uid] " ,
2024-11-13 11:27:12 +01:00
" batchUid " : " [batch_uid] " ,
2024-07-30 15:43:40 +02:00
" indexUid " : " doggo " ,
" status " : " succeeded " ,
" type " : " documentAdditionOrUpdate " ,
" canceledBy " : null ,
" details " : {
" receivedDocuments " : 4 ,
" indexedDocuments " : 4
} ,
" error " : null ,
" duration " : " [duration] " ,
" enqueuedAt " : " [date] " ,
" startedAt " : " [date] " ,
" finishedAt " : " [date] "
}
" ###);
let ( documents , _code ) = index
. get_all_documents ( GetAllDocumentsOptions { retrieve_vectors : true , .. Default ::default ( ) } )
. await ;
snapshot! ( json_string! ( documents , { " .results.*._vectors.default.embeddings " = > " [vector] " } ) , @ r ###"
{
" results " : [
{
" id " : 0 ,
" name " : " kefir " ,
" gender " : " M " ,
" birthyear " : 2023 ,
" breed " : " Patou " ,
" _vectors " : {
" default " : {
" embeddings " : " [vector] " ,
" regenerate " : true
}
}
} ,
{
" id " : 1 ,
" name " : " Intel " ,
" gender " : " M " ,
" birthyear " : 2011 ,
" breed " : " Beagle " ,
" _vectors " : {
" default " : {
" embeddings " : " [vector] " ,
" regenerate " : true
}
}
} ,
{
" id " : 2 ,
" name " : " Vénus " ,
" gender " : " F " ,
" birthyear " : 2003 ,
" breed " : " Jack Russel Terrier " ,
" _vectors " : {
" default " : {
" embeddings " : " [vector] " ,
" regenerate " : true
}
}
} ,
{
" id " : 3 ,
" name " : " Max " ,
" gender " : " M " ,
" birthyear " : 1995 ,
" breed " : " Labrador Retriever " ,
" _vectors " : {
" default " : {
" embeddings " : " [vector] " ,
" regenerate " : true
}
}
}
] ,
" offset " : 0 ,
" limit " : 20 ,
" total " : 4
}
" ###);
let ( response , code ) = index
. search_post ( json! ( {
" q " : " chien de chasse " ,
2024-09-17 16:30:04 +02:00
" hybrid " : { " semanticRatio " : 1.0 , " embedder " : " default " } ,
2024-07-30 15:43:40 +02:00
} ) )
. await ;
snapshot! ( code , @ " 200 OK " ) ;
snapshot! ( json_string! ( response [ " hits " ] ) , @ r ###"
[
{
" id " : 1 ,
" name " : " Intel " ,
" gender " : " M " ,
" birthyear " : 2011 ,
" breed " : " Beagle "
} ,
{
" id " : 0 ,
" name " : " kefir " ,
" gender " : " M " ,
" birthyear " : 2023 ,
" breed " : " Patou "
} ,
{
" id " : 3 ,
" name " : " Max " ,
" gender " : " M " ,
" birthyear " : 1995 ,
" breed " : " Labrador Retriever "
} ,
{
" id " : 2 ,
" name " : " Vénus " ,
" gender " : " F " ,
" birthyear " : 2003 ,
" breed " : " Jack Russel Terrier "
}
]
" ###);
let ( response , code ) = index
. search_post ( json! ( {
" q " : " petit chien " ,
2024-09-17 16:30:04 +02:00
" hybrid " : { " semanticRatio " : 1.0 , " embedder " : " default " }
2024-07-30 15:43:40 +02:00
} ) )
. await ;
snapshot! ( code , @ " 200 OK " ) ;
snapshot! ( json_string! ( response [ " hits " ] ) , @ r ###"
[
{
" id " : 1 ,
" name " : " Intel " ,
" gender " : " M " ,
" birthyear " : 2011 ,
" breed " : " Beagle "
} ,
{
" id " : 2 ,
" name " : " Vénus " ,
" gender " : " F " ,
" birthyear " : 2003 ,
" breed " : " Jack Russel Terrier "
} ,
{
" id " : 0 ,
" name " : " kefir " ,
" gender " : " M " ,
" birthyear " : 2023 ,
" breed " : " Patou "
} ,
{
" id " : 3 ,
" name " : " Max " ,
" gender " : " M " ,
" birthyear " : 1995 ,
" breed " : " Labrador Retriever "
}
]
" ###);
let ( response , code ) = index
. search_post ( json! ( {
" q " : " grand chien de berger des montagnes " ,
2024-09-17 16:30:04 +02:00
" hybrid " : { " semanticRatio " : 1.0 , " embedder " : " default " }
2024-07-30 15:43:40 +02:00
} ) )
. await ;
snapshot! ( code , @ " 200 OK " ) ;
snapshot! ( json_string! ( response [ " hits " ] ) , @ r ###"
[
{
" id " : 0 ,
" name " : " kefir " ,
" gender " : " M " ,
" birthyear " : 2023 ,
" breed " : " Patou "
} ,
{
" id " : 1 ,
" name " : " Intel " ,
" gender " : " M " ,
" birthyear " : 2011 ,
" breed " : " Beagle "
} ,
{
" id " : 3 ,
" name " : " Max " ,
" gender " : " M " ,
" birthyear " : 1995 ,
" breed " : " Labrador Retriever "
} ,
{
" id " : 2 ,
" name " : " Vénus " ,
" gender " : " F " ,
" birthyear " : 2003 ,
" breed " : " Jack Russel Terrier "
}
]
" ###);
}
// tokenize long text
2024-07-31 15:01:34 +02:00
// basic test "it works"
#[ actix_rt::test ]
async fn tokenize_long_text ( ) {
let ( _mock , setting ) = create_mock_tokenized ( ) . await ;
let server = get_server_vector ( ) . await ;
let index = server . index ( " doggo " ) ;
let ( response , code ) = index
. update_settings ( json! ( {
" embedders " : {
" default " : setting ,
} ,
} ) )
. await ;
snapshot! ( code , @ " 202 Accepted " ) ;
let task = server . wait_task ( response . uid ( ) ) . await ;
snapshot! ( task [ " status " ] , @ r ### ""succeeded""### ) ;
let documents = json! ( [
{ " id " : 0 , " text " : long_text ( ) }
] ) ;
let ( value , code ) = index . add_documents ( documents , None ) . await ;
snapshot! ( code , @ " 202 Accepted " ) ;
let task = index . wait_task ( value . uid ( ) ) . await ;
snapshot! ( task , @ r ###"
{
2024-07-31 17:57:55 +02:00
" uid " : " [uid] " ,
2024-11-13 11:27:12 +01:00
" batchUid " : " [batch_uid] " ,
2024-07-31 15:01:34 +02:00
" indexUid " : " doggo " ,
" status " : " succeeded " ,
" type " : " documentAdditionOrUpdate " ,
" canceledBy " : null ,
" details " : {
" receivedDocuments " : 1 ,
" indexedDocuments " : 1
} ,
" error " : null ,
" duration " : " [duration] " ,
" enqueuedAt " : " [date] " ,
" startedAt " : " [date] " ,
" finishedAt " : " [date] "
}
" ###);
let ( response , code ) = index
. search_post ( json! ( {
" q " : " grand chien de berger des montagnes " ,
" showRankingScore " : true ,
" attributesToRetrieve " : [ " id " ] ,
2024-09-17 16:30:04 +02:00
" hybrid " : { " semanticRatio " : 1.0 , " embedder " : " default " }
2024-07-31 15:01:34 +02:00
} ) )
. await ;
snapshot! ( code , @ " 200 OK " ) ;
snapshot! ( json_string! ( response [ " hits " ] ) , @ r ###"
[
{
" id " : 0 ,
" _rankingScore " : 0.07944583892822266
}
]
" ###);
}
2024-07-30 15:43:40 +02:00
// "wrong parameters"
#[ actix_rt::test ]
async fn bad_api_key ( ) {
let ( _mock , mut setting ) = create_mock ( ) . await ;
let server = get_server_vector ( ) . await ;
let index = server . index ( " doggo " ) ;
let documents = json! ( [
{ " id " : 0 , " name " : " kefir " , " gender " : " M " , " birthyear " : 2023 , " breed " : " Patou " } ,
{ " id " : 1 , " name " : " Intel " , " gender " : " M " , " birthyear " : 2011 , " breed " : " Beagle " } ,
{ " id " : 2 , " name " : " Vénus " , " gender " : " F " , " birthyear " : 2003 , " breed " : " Jack Russel Terrier " } ,
{ " id " : 3 , " name " : " Max " , " gender " : " M " , " birthyear " : 1995 , " breed " : " Labrador Retriever " } ,
] ) ;
let ( value , code ) = index . add_documents ( documents , None ) . await ;
snapshot! ( code , @ " 202 Accepted " ) ;
let task = index . wait_task ( value . uid ( ) ) . await ;
snapshot! ( task , @ r ###"
{
2024-07-31 17:57:55 +02:00
" uid " : " [uid] " ,
2024-11-13 11:27:12 +01:00
" batchUid " : " [batch_uid] " ,
2024-07-30 15:43:40 +02:00
" indexUid " : " doggo " ,
" status " : " succeeded " ,
" type " : " documentAdditionOrUpdate " ,
" canceledBy " : null ,
" details " : {
" receivedDocuments " : 4 ,
" indexedDocuments " : 4
} ,
" error " : null ,
" duration " : " [duration] " ,
" enqueuedAt " : " [date] " ,
" startedAt " : " [date] " ,
" finishedAt " : " [date] "
}
" ###);
// wrong API key
setting [ " apiKey " ] = " doggo " . into ( ) ;
let ( response , code ) = index
. update_settings ( json! ( {
" embedders " : {
" default " : setting ,
} ,
} ) )
. await ;
snapshot! ( code , @ " 202 Accepted " ) ;
let task = server . wait_task ( response . uid ( ) ) . await ;
snapshot! ( task , @ r ###"
{
2024-07-31 17:57:55 +02:00
" uid " : " [uid] " ,
2024-11-13 11:27:12 +01:00
" batchUid " : " [batch_uid] " ,
2024-07-30 15:43:40 +02:00
" indexUid " : " doggo " ,
" status " : " failed " ,
" type " : " settingsUpdate " ,
" canceledBy " : null ,
" details " : {
" embedders " : {
" default " : {
" source " : " openAi " ,
" model " : " text-embedding-3-large " ,
" apiKey " : " XXX... " ,
" documentTemplate " : " {%- if doc.gender == \" F \" -%}Une chienne nommée {{doc.name}}, née en {{doc.birthyear}} \n {%- else -%} \n Un chien nommé {{doc.name}}, né en {{doc.birthyear}} \n {%- endif %}, de race {{doc.breed}}. " ,
2024-08-28 09:10:09 +02:00
" documentTemplateMaxBytes " : 8000000 ,
2024-07-30 15:43:40 +02:00
" url " : " [url] "
}
}
} ,
" error " : {
2024-11-22 14:19:20 +08:00
" message " : " Index `doggo`: While embedding documents for embedder `default`: user error: could not authenticate against OpenAI server \n - server replied with `{ \" error \" :{ \" message \" : \" Incorrect API key provided: Bearer doggo. You can find your API key at https://platform.openai.com/account/api-keys. \" , \" type \" : \" invalid_request_error \" , \" param \" :null, \" code \" : \" invalid_api_key \" }}` \n - Hint: Check the `apiKey` parameter in the embedder configuration, and the `MEILI_OPENAI_API_KEY` and `OPENAI_API_KEY` environment variables " ,
2024-07-30 15:43:40 +02:00
" code " : " vector_embedding_error " ,
" type " : " invalid_request " ,
" link " : " https://docs.meilisearch.com/errors#vector_embedding_error "
} ,
" duration " : " [duration] " ,
" enqueuedAt " : " [date] " ,
" startedAt " : " [date] " ,
" finishedAt " : " [date] "
}
" ###);
// no API key
setting . as_object_mut ( ) . unwrap ( ) . remove ( " apiKey " ) ;
let ( response , code ) = index
. update_settings ( json! ( {
" embedders " : {
" default " : setting ,
} ,
} ) )
. await ;
snapshot! ( code , @ " 202 Accepted " ) ;
let task = server . wait_task ( response . uid ( ) ) . await ;
snapshot! ( task , @ r ###"
{
2024-07-31 17:57:55 +02:00
" uid " : " [uid] " ,
2024-11-13 11:27:12 +01:00
" batchUid " : " [batch_uid] " ,
2024-07-30 15:43:40 +02:00
" indexUid " : " doggo " ,
" status " : " failed " ,
" type " : " settingsUpdate " ,
" canceledBy " : null ,
" details " : {
" embedders " : {
" default " : {
" source " : " openAi " ,
" model " : " text-embedding-3-large " ,
" documentTemplate " : " {%- if doc.gender == \" F \" -%}Une chienne nommée {{doc.name}}, née en {{doc.birthyear}} \n {%- else -%} \n Un chien nommé {{doc.name}}, né en {{doc.birthyear}} \n {%- endif %}, de race {{doc.breed}}. " ,
2024-08-28 09:10:09 +02:00
" documentTemplateMaxBytes " : 8000000 ,
2024-07-30 15:43:40 +02:00
" url " : " [url] "
}
}
} ,
" error " : {
2024-11-22 14:19:20 +08:00
" message " : " Index `doggo`: While embedding documents for embedder `default`: user error: could not authenticate against OpenAI server \n - server replied with `{ \" error \" :{ \" message \" : \" You didn't provide an API key. You need to provide your API key in an Authorization header using Bearer auth (i.e. Authorization: Bearer YOUR_KEY), or as the password field (with blank username) if you're accessing the API from your browser and are prompted for a username and password. You can obtain an API key from https://platform.openai.com/account/api-keys. \" , \" type \" : \" invalid_request_error \" , \" param \" :null, \" code \" :null}}` \n - Hint: Check the `apiKey` parameter in the embedder configuration, and the `MEILI_OPENAI_API_KEY` and `OPENAI_API_KEY` environment variables " ,
2024-07-30 15:43:40 +02:00
" code " : " vector_embedding_error " ,
" type " : " invalid_request " ,
" link " : " https://docs.meilisearch.com/errors#vector_embedding_error "
} ,
" duration " : " [duration] " ,
" enqueuedAt " : " [date] " ,
" startedAt " : " [date] " ,
" finishedAt " : " [date] "
}
" ###);
// not a string API key
setting [ " apiKey " ] = 42. into ( ) ;
let ( response , code ) = index
. update_settings ( json! ( {
" embedders " : {
" default " : setting ,
} ,
} ) )
. await ;
snapshot! ( code , @ " 400 Bad Request " ) ;
snapshot! ( response , @ r ###"
{
" message " : " Invalid value type at `.embedders.default.apiKey`: expected a string, but found a positive integer: `42` " ,
" code " : " invalid_settings_embedders " ,
" type " : " invalid_request " ,
" link " : " https://docs.meilisearch.com/errors#invalid_settings_embedders "
}
" ###);
}
// one test with wrong model
#[ actix_rt::test ]
async fn bad_model ( ) {
let ( _mock , mut setting ) = create_mock ( ) . await ;
let server = get_server_vector ( ) . await ;
let index = server . index ( " doggo " ) ;
let documents = json! ( [
{ " id " : 0 , " name " : " kefir " , " gender " : " M " , " birthyear " : 2023 , " breed " : " Patou " } ,
{ " id " : 1 , " name " : " Intel " , " gender " : " M " , " birthyear " : 2011 , " breed " : " Beagle " } ,
{ " id " : 2 , " name " : " Vénus " , " gender " : " F " , " birthyear " : 2003 , " breed " : " Jack Russel Terrier " } ,
{ " id " : 3 , " name " : " Max " , " gender " : " M " , " birthyear " : 1995 , " breed " : " Labrador Retriever " } ,
] ) ;
let ( value , code ) = index . add_documents ( documents , None ) . await ;
snapshot! ( code , @ " 202 Accepted " ) ;
let task = index . wait_task ( value . uid ( ) ) . await ;
snapshot! ( task , @ r ###"
{
2024-07-31 17:57:55 +02:00
" uid " : " [uid] " ,
2024-11-13 11:27:12 +01:00
" batchUid " : " [batch_uid] " ,
2024-07-30 15:43:40 +02:00
" indexUid " : " doggo " ,
" status " : " succeeded " ,
" type " : " documentAdditionOrUpdate " ,
" canceledBy " : null ,
" details " : {
" receivedDocuments " : 4 ,
" indexedDocuments " : 4
} ,
" error " : null ,
" duration " : " [duration] " ,
" enqueuedAt " : " [date] " ,
" startedAt " : " [date] " ,
" finishedAt " : " [date] "
}
" ###);
// wrong model
setting [ " model " ] = " doggo " . into ( ) ;
let ( response , code ) = index
. update_settings ( json! ( {
" embedders " : {
" default " : setting ,
} ,
} ) )
. await ;
snapshot! ( code , @ " 400 Bad Request " ) ;
snapshot! ( response , @ r ###"
{
" message " : " `.embedders.default.model`: Invalid model `doggo` for OpenAI. Supported models: [ \" text-embedding-ada-002 \" , \" text-embedding-3-small \" , \" text-embedding-3-large \" ] " ,
" code " : " invalid_settings_embedders " ,
" type " : " invalid_request " ,
" link " : " https://docs.meilisearch.com/errors#invalid_settings_embedders "
}
" ###);
// not a string model
setting [ " model " ] = 42. into ( ) ;
let ( response , code ) = index
. update_settings ( json! ( {
" embedders " : {
" default " : setting ,
} ,
} ) )
. await ;
snapshot! ( code , @ " 400 Bad Request " ) ;
snapshot! ( response , @ r ###"
{
" message " : " Invalid value type at `.embedders.default.model`: expected a string, but found a positive integer: `42` " ,
" code " : " invalid_settings_embedders " ,
" type " : " invalid_request " ,
" link " : " https://docs.meilisearch.com/errors#invalid_settings_embedders "
}
" ###);
}
#[ actix_rt::test ]
async fn bad_dimensions ( ) {
let ( _mock , mut setting ) = create_mock ( ) . await ;
let server = get_server_vector ( ) . await ;
let index = server . index ( " doggo " ) ;
let documents = json! ( [
{ " id " : 0 , " name " : " kefir " , " gender " : " M " , " birthyear " : 2023 , " breed " : " Patou " } ,
{ " id " : 1 , " name " : " Intel " , " gender " : " M " , " birthyear " : 2011 , " breed " : " Beagle " } ,
{ " id " : 2 , " name " : " Vénus " , " gender " : " F " , " birthyear " : 2003 , " breed " : " Jack Russel Terrier " } ,
{ " id " : 3 , " name " : " Max " , " gender " : " M " , " birthyear " : 1995 , " breed " : " Labrador Retriever " } ,
] ) ;
let ( value , code ) = index . add_documents ( documents , None ) . await ;
snapshot! ( code , @ " 202 Accepted " ) ;
let task = index . wait_task ( value . uid ( ) ) . await ;
snapshot! ( task , @ r ###"
{
2024-07-31 17:57:55 +02:00
" uid " : " [uid] " ,
2024-11-13 11:27:12 +01:00
" batchUid " : " [batch_uid] " ,
2024-07-30 15:43:40 +02:00
" indexUid " : " doggo " ,
" status " : " succeeded " ,
" type " : " documentAdditionOrUpdate " ,
" canceledBy " : null ,
" details " : {
" receivedDocuments " : 4 ,
" indexedDocuments " : 4
} ,
" error " : null ,
" duration " : " [duration] " ,
" enqueuedAt " : " [date] " ,
" startedAt " : " [date] " ,
" finishedAt " : " [date] "
}
" ###);
// null dimensions
setting [ " dimensions " ] = 0. into ( ) ;
let ( response , code ) = index
. update_settings ( json! ( {
" embedders " : {
" default " : setting ,
} ,
} ) )
. await ;
snapshot! ( code , @ " 400 Bad Request " ) ;
snapshot! ( response , @ r ###"
{
" message " : " `.embedders.default.dimensions`: `dimensions` cannot be zero " ,
" code " : " invalid_settings_embedders " ,
" type " : " invalid_request " ,
" link " : " https://docs.meilisearch.com/errors#invalid_settings_embedders "
}
" ###);
// negative dimensions
setting [ " dimensions " ] = ( - 42 ) . into ( ) ;
let ( response , code ) = index
. update_settings ( json! ( {
" embedders " : {
" default " : setting ,
} ,
} ) )
. await ;
snapshot! ( code , @ " 400 Bad Request " ) ;
snapshot! ( response , @ r ###"
{
" message " : " Invalid value type at `.embedders.default.dimensions`: expected a positive integer, but found a negative integer: `-42` " ,
" code " : " invalid_settings_embedders " ,
" type " : " invalid_request " ,
" link " : " https://docs.meilisearch.com/errors#invalid_settings_embedders "
}
" ###);
// huge dimensions
setting [ " dimensions " ] = ( 42_000_000 ) . into ( ) ;
let ( response , code ) = index
. update_settings ( json! ( {
" embedders " : {
" default " : setting ,
} ,
} ) )
. await ;
snapshot! ( code , @ " 400 Bad Request " ) ;
snapshot! ( response , @ r ###"
{
" message " : " `.embedders.default.dimensions`: Model `text-embedding-3-large` does not support overriding its dimensions to a value higher than 3072. Found 42000000 " ,
" code " : " invalid_settings_embedders " ,
" type " : " invalid_request " ,
" link " : " https://docs.meilisearch.com/errors#invalid_settings_embedders "
}
" ###);
}
// one test with changed dimensions
#[ actix_rt::test ]
async fn smaller_dimensions ( ) {
let ( _mock , setting ) = create_mock_dimensions ( ) . await ;
let server = get_server_vector ( ) . await ;
let index = server . index ( " doggo " ) ;
let ( response , code ) = index
. update_settings ( json! ( {
" embedders " : {
" default " : setting ,
} ,
} ) )
. await ;
snapshot! ( code , @ " 202 Accepted " ) ;
let task = server . wait_task ( response . uid ( ) ) . await ;
snapshot! ( task [ " status " ] , @ r ### ""succeeded""### ) ;
let documents = json! ( [
{ " id " : 0 , " name " : " kefir " , " gender " : " M " , " birthyear " : 2023 , " breed " : " Patou " } ,
{ " id " : 1 , " name " : " Intel " , " gender " : " M " , " birthyear " : 2011 , " breed " : " Beagle " } ,
{ " id " : 2 , " name " : " Vénus " , " gender " : " F " , " birthyear " : 2003 , " breed " : " Jack Russel Terrier " } ,
{ " id " : 3 , " name " : " Max " , " gender " : " M " , " birthyear " : 1995 , " breed " : " Labrador Retriever " } ,
] ) ;
let ( value , code ) = index . add_documents ( documents , None ) . await ;
snapshot! ( code , @ " 202 Accepted " ) ;
let task = index . wait_task ( value . uid ( ) ) . await ;
snapshot! ( task , @ r ###"
{
2024-07-31 17:57:55 +02:00
" uid " : " [uid] " ,
2024-11-13 11:27:12 +01:00
" batchUid " : " [batch_uid] " ,
2024-07-30 15:43:40 +02:00
" indexUid " : " doggo " ,
" status " : " succeeded " ,
" type " : " documentAdditionOrUpdate " ,
" canceledBy " : null ,
" details " : {
" receivedDocuments " : 4 ,
" indexedDocuments " : 4
} ,
" error " : null ,
" duration " : " [duration] " ,
" enqueuedAt " : " [date] " ,
" startedAt " : " [date] " ,
" finishedAt " : " [date] "
}
" ###);
let ( documents , _code ) = index
. get_all_documents ( GetAllDocumentsOptions { retrieve_vectors : true , .. Default ::default ( ) } )
. await ;
snapshot! ( json_string! ( documents , { " .results.*._vectors.default.embeddings " = > " [vector] " } ) , @ r ###"
{
" results " : [
{
" id " : 0 ,
" name " : " kefir " ,
" gender " : " M " ,
" birthyear " : 2023 ,
" breed " : " Patou " ,
" _vectors " : {
" default " : {
" embeddings " : " [vector] " ,
" regenerate " : true
}
}
} ,
{
" id " : 1 ,
" name " : " Intel " ,
" gender " : " M " ,
" birthyear " : 2011 ,
" breed " : " Beagle " ,
" _vectors " : {
" default " : {
" embeddings " : " [vector] " ,
" regenerate " : true
}
}
} ,
{
" id " : 2 ,
" name " : " Vénus " ,
" gender " : " F " ,
" birthyear " : 2003 ,
" breed " : " Jack Russel Terrier " ,
" _vectors " : {
" default " : {
" embeddings " : " [vector] " ,
" regenerate " : true
}
}
} ,
{
" id " : 3 ,
" name " : " Max " ,
" gender " : " M " ,
" birthyear " : 1995 ,
" breed " : " Labrador Retriever " ,
" _vectors " : {
" default " : {
" embeddings " : " [vector] " ,
" regenerate " : true
}
}
}
] ,
" offset " : 0 ,
" limit " : 20 ,
" total " : 4
}
" ###);
let ( response , code ) = index
. search_post ( json! ( {
" q " : " chien de chasse " ,
2024-09-17 16:30:04 +02:00
" hybrid " : { " semanticRatio " : 1.0 , " embedder " : " default " }
2024-07-30 15:43:40 +02:00
} ) )
. await ;
snapshot! ( code , @ " 200 OK " ) ;
snapshot! ( json_string! ( response [ " hits " ] ) , @ r ###"
[
{
" id " : 1 ,
" name " : " Intel " ,
" gender " : " M " ,
" birthyear " : 2011 ,
" breed " : " Beagle "
} ,
{
" id " : 0 ,
" name " : " kefir " ,
" gender " : " M " ,
" birthyear " : 2023 ,
" breed " : " Patou "
} ,
{
" id " : 2 ,
" name " : " Vénus " ,
" gender " : " F " ,
" birthyear " : 2003 ,
" breed " : " Jack Russel Terrier "
} ,
{
" id " : 3 ,
" name " : " Max " ,
" gender " : " M " ,
" birthyear " : 1995 ,
" breed " : " Labrador Retriever "
}
]
" ###);
let ( response , code ) = index
. search_post ( json! ( {
" q " : " petit chien " ,
2024-09-17 16:30:04 +02:00
" hybrid " : { " semanticRatio " : 1.0 , " embedder " : " default " }
2024-07-30 15:43:40 +02:00
} ) )
. await ;
snapshot! ( code , @ " 200 OK " ) ;
snapshot! ( json_string! ( response [ " hits " ] ) , @ r ###"
[
{
" id " : 2 ,
" name " : " Vénus " ,
" gender " : " F " ,
" birthyear " : 2003 ,
" breed " : " Jack Russel Terrier "
} ,
{
" id " : 0 ,
" name " : " kefir " ,
" gender " : " M " ,
" birthyear " : 2023 ,
" breed " : " Patou "
} ,
{
" id " : 1 ,
" name " : " Intel " ,
" gender " : " M " ,
" birthyear " : 2011 ,
" breed " : " Beagle "
} ,
{
" id " : 3 ,
" name " : " Max " ,
" gender " : " M " ,
" birthyear " : 1995 ,
" breed " : " Labrador Retriever "
}
]
" ###);
let ( response , code ) = index
. search_post ( json! ( {
" q " : " grand chien de berger des montagnes " ,
2024-09-17 16:30:04 +02:00
" hybrid " : { " semanticRatio " : 1.0 , " embedder " : " default " }
2024-07-30 15:43:40 +02:00
} ) )
. await ;
snapshot! ( code , @ " 200 OK " ) ;
snapshot! ( json_string! ( response [ " hits " ] ) , @ r ###"
[
{
" id " : 0 ,
" name " : " kefir " ,
" gender " : " M " ,
" birthyear " : 2023 ,
" breed " : " Patou "
} ,
{
" id " : 3 ,
" name " : " Max " ,
" gender " : " M " ,
" birthyear " : 1995 ,
" breed " : " Labrador Retriever "
} ,
{
" id " : 1 ,
" name " : " Intel " ,
" gender " : " M " ,
" birthyear " : 2011 ,
" breed " : " Beagle "
} ,
{
" id " : 2 ,
" name " : " Vénus " ,
" gender " : " F " ,
" birthyear " : 2003 ,
" breed " : " Jack Russel Terrier "
}
]
" ###);
}
// one test with different models
#[ actix_rt::test ]
async fn small_embedding_model ( ) {
let ( _mock , setting ) = create_mock_small_embedding_model ( ) . await ;
let server = get_server_vector ( ) . await ;
let index = server . index ( " doggo " ) ;
let ( response , code ) = index
. update_settings ( json! ( {
" embedders " : {
" default " : setting ,
} ,
} ) )
. await ;
snapshot! ( code , @ " 202 Accepted " ) ;
let task = server . wait_task ( response . uid ( ) ) . await ;
snapshot! ( task [ " status " ] , @ r ### ""succeeded""### ) ;
let documents = json! ( [
{ " id " : 0 , " name " : " kefir " , " gender " : " M " , " birthyear " : 2023 , " breed " : " Patou " } ,
{ " id " : 1 , " name " : " Intel " , " gender " : " M " , " birthyear " : 2011 , " breed " : " Beagle " } ,
{ " id " : 2 , " name " : " Vénus " , " gender " : " F " , " birthyear " : 2003 , " breed " : " Jack Russel Terrier " } ,
{ " id " : 3 , " name " : " Max " , " gender " : " M " , " birthyear " : 1995 , " breed " : " Labrador Retriever " } ,
] ) ;
let ( value , code ) = index . add_documents ( documents , None ) . await ;
snapshot! ( code , @ " 202 Accepted " ) ;
let task = index . wait_task ( value . uid ( ) ) . await ;
snapshot! ( task , @ r ###"
{
2024-07-31 17:57:55 +02:00
" uid " : " [uid] " ,
2024-11-13 11:27:12 +01:00
" batchUid " : " [batch_uid] " ,
2024-07-30 15:43:40 +02:00
" indexUid " : " doggo " ,
" status " : " succeeded " ,
" type " : " documentAdditionOrUpdate " ,
" canceledBy " : null ,
" details " : {
" receivedDocuments " : 4 ,
" indexedDocuments " : 4
} ,
" error " : null ,
" duration " : " [duration] " ,
" enqueuedAt " : " [date] " ,
" startedAt " : " [date] " ,
" finishedAt " : " [date] "
}
" ###);
let ( documents , _code ) = index
. get_all_documents ( GetAllDocumentsOptions { retrieve_vectors : true , .. Default ::default ( ) } )
. await ;
snapshot! ( json_string! ( documents , { " .results.*._vectors.default.embeddings " = > " [vector] " } ) , @ r ###"
{
" results " : [
{
" id " : 0 ,
" name " : " kefir " ,
" gender " : " M " ,
" birthyear " : 2023 ,
" breed " : " Patou " ,
" _vectors " : {
" default " : {
" embeddings " : " [vector] " ,
" regenerate " : true
}
}
} ,
{
" id " : 1 ,
" name " : " Intel " ,
" gender " : " M " ,
" birthyear " : 2011 ,
" breed " : " Beagle " ,
" _vectors " : {
" default " : {
" embeddings " : " [vector] " ,
" regenerate " : true
}
}
} ,
{
" id " : 2 ,
" name " : " Vénus " ,
" gender " : " F " ,
" birthyear " : 2003 ,
" breed " : " Jack Russel Terrier " ,
" _vectors " : {
" default " : {
" embeddings " : " [vector] " ,
" regenerate " : true
}
}
} ,
{
" id " : 3 ,
" name " : " Max " ,
" gender " : " M " ,
" birthyear " : 1995 ,
" breed " : " Labrador Retriever " ,
" _vectors " : {
" default " : {
" embeddings " : " [vector] " ,
" regenerate " : true
}
}
}
] ,
" offset " : 0 ,
" limit " : 20 ,
" total " : 4
}
" ###);
let ( response , code ) = index
. search_post ( json! ( {
" q " : " chien de chasse " ,
2024-09-17 16:30:04 +02:00
" hybrid " : { " semanticRatio " : 1.0 , " embedder " : " default " }
2024-07-30 15:43:40 +02:00
} ) )
. await ;
snapshot! ( code , @ " 200 OK " ) ;
snapshot! ( json_string! ( response [ " hits " ] ) , @ r ###"
[
{
" id " : 3 ,
" name " : " Max " ,
" gender " : " M " ,
" birthyear " : 1995 ,
" breed " : " Labrador Retriever "
} ,
{
" id " : 1 ,
" name " : " Intel " ,
" gender " : " M " ,
" birthyear " : 2011 ,
" breed " : " Beagle "
} ,
{
" id " : 2 ,
" name " : " Vénus " ,
" gender " : " F " ,
" birthyear " : 2003 ,
" breed " : " Jack Russel Terrier "
} ,
{
" id " : 0 ,
" name " : " kefir " ,
" gender " : " M " ,
" birthyear " : 2023 ,
" breed " : " Patou "
}
]
" ###);
let ( response , code ) = index
. search_post ( json! ( {
" q " : " petit chien " ,
2024-09-17 16:30:04 +02:00
" hybrid " : { " semanticRatio " : 1.0 , " embedder " : " default " }
2024-07-30 15:43:40 +02:00
} ) )
. await ;
snapshot! ( code , @ " 200 OK " ) ;
snapshot! ( json_string! ( response [ " hits " ] ) , @ r ###"
[
{
" id " : 0 ,
" name " : " kefir " ,
" gender " : " M " ,
" birthyear " : 2023 ,
" breed " : " Patou "
} ,
{
" id " : 1 ,
" name " : " Intel " ,
" gender " : " M " ,
" birthyear " : 2011 ,
" breed " : " Beagle "
} ,
{
" id " : 3 ,
" name " : " Max " ,
" gender " : " M " ,
" birthyear " : 1995 ,
" breed " : " Labrador Retriever "
} ,
{
" id " : 2 ,
" name " : " Vénus " ,
" gender " : " F " ,
" birthyear " : 2003 ,
" breed " : " Jack Russel Terrier "
}
]
" ###);
let ( response , code ) = index
. search_post ( json! ( {
" q " : " grand chien de berger des montagnes " ,
2024-09-17 16:30:04 +02:00
" hybrid " : { " semanticRatio " : 1.0 , " embedder " : " default " }
2024-07-30 15:43:40 +02:00
} ) )
. await ;
snapshot! ( code , @ " 200 OK " ) ;
snapshot! ( json_string! ( response [ " hits " ] ) , @ r ###"
[
{
" id " : 3 ,
" name " : " Max " ,
" gender " : " M " ,
" birthyear " : 1995 ,
" breed " : " Labrador Retriever "
} ,
{
" id " : 0 ,
" name " : " kefir " ,
" gender " : " M " ,
" birthyear " : 2023 ,
" breed " : " Patou "
} ,
{
" id " : 1 ,
" name " : " Intel " ,
" gender " : " M " ,
" birthyear " : 2011 ,
" breed " : " Beagle "
} ,
{
" id " : 2 ,
" name " : " Vénus " ,
" gender " : " F " ,
" birthyear " : 2003 ,
" breed " : " Jack Russel Terrier "
}
]
" ###);
}
#[ actix_rt::test ]
async fn legacy_embedding_model ( ) {
let ( _mock , setting ) = create_mock_legacy_embedding_model ( ) . await ;
let server = get_server_vector ( ) . await ;
let index = server . index ( " doggo " ) ;
let ( response , code ) = index
. update_settings ( json! ( {
" embedders " : {
" default " : setting ,
} ,
} ) )
. await ;
snapshot! ( code , @ " 202 Accepted " ) ;
let task = server . wait_task ( response . uid ( ) ) . await ;
snapshot! ( task [ " status " ] , @ r ### ""succeeded""### ) ;
let documents = json! ( [
{ " id " : 0 , " name " : " kefir " , " gender " : " M " , " birthyear " : 2023 , " breed " : " Patou " } ,
{ " id " : 1 , " name " : " Intel " , " gender " : " M " , " birthyear " : 2011 , " breed " : " Beagle " } ,
{ " id " : 2 , " name " : " Vénus " , " gender " : " F " , " birthyear " : 2003 , " breed " : " Jack Russel Terrier " } ,
{ " id " : 3 , " name " : " Max " , " gender " : " M " , " birthyear " : 1995 , " breed " : " Labrador Retriever " } ,
] ) ;
let ( value , code ) = index . add_documents ( documents , None ) . await ;
snapshot! ( code , @ " 202 Accepted " ) ;
let task = index . wait_task ( value . uid ( ) ) . await ;
snapshot! ( task , @ r ###"
{
2024-07-31 17:57:55 +02:00
" uid " : " [uid] " ,
2024-11-13 11:27:12 +01:00
" batchUid " : " [batch_uid] " ,
2024-07-30 15:43:40 +02:00
" indexUid " : " doggo " ,
" status " : " succeeded " ,
" type " : " documentAdditionOrUpdate " ,
" canceledBy " : null ,
" details " : {
" receivedDocuments " : 4 ,
" indexedDocuments " : 4
} ,
" error " : null ,
" duration " : " [duration] " ,
" enqueuedAt " : " [date] " ,
" startedAt " : " [date] " ,
" finishedAt " : " [date] "
}
" ###);
let ( documents , _code ) = index
. get_all_documents ( GetAllDocumentsOptions { retrieve_vectors : true , .. Default ::default ( ) } )
. await ;
snapshot! ( json_string! ( documents , { " .results.*._vectors.default.embeddings " = > " [vector] " } ) , @ r ###"
{
" results " : [
{
" id " : 0 ,
" name " : " kefir " ,
" gender " : " M " ,
" birthyear " : 2023 ,
" breed " : " Patou " ,
" _vectors " : {
" default " : {
" embeddings " : " [vector] " ,
" regenerate " : true
}
}
} ,
{
" id " : 1 ,
" name " : " Intel " ,
" gender " : " M " ,
" birthyear " : 2011 ,
" breed " : " Beagle " ,
" _vectors " : {
" default " : {
" embeddings " : " [vector] " ,
" regenerate " : true
}
}
} ,
{
" id " : 2 ,
" name " : " Vénus " ,
" gender " : " F " ,
" birthyear " : 2003 ,
" breed " : " Jack Russel Terrier " ,
" _vectors " : {
" default " : {
" embeddings " : " [vector] " ,
" regenerate " : true
}
}
} ,
{
" id " : 3 ,
" name " : " Max " ,
" gender " : " M " ,
" birthyear " : 1995 ,
" breed " : " Labrador Retriever " ,
" _vectors " : {
" default " : {
" embeddings " : " [vector] " ,
" regenerate " : true
}
}
}
] ,
" offset " : 0 ,
" limit " : 20 ,
" total " : 4
}
" ###);
let ( response , code ) = index
. search_post ( json! ( {
" q " : " chien de chasse " ,
2024-09-17 16:30:04 +02:00
" hybrid " : { " semanticRatio " : 1.0 , " embedder " : " default " }
2024-07-30 15:43:40 +02:00
} ) )
. await ;
snapshot! ( code , @ " 200 OK " ) ;
snapshot! ( json_string! ( response [ " hits " ] ) , @ r ###"
[
{
" id " : 2 ,
" name " : " Vénus " ,
" gender " : " F " ,
" birthyear " : 2003 ,
" breed " : " Jack Russel Terrier "
} ,
{
" id " : 3 ,
" name " : " Max " ,
" gender " : " M " ,
" birthyear " : 1995 ,
" breed " : " Labrador Retriever "
} ,
{
" id " : 1 ,
" name " : " Intel " ,
" gender " : " M " ,
" birthyear " : 2011 ,
" breed " : " Beagle "
} ,
{
" id " : 0 ,
" name " : " kefir " ,
" gender " : " M " ,
" birthyear " : 2023 ,
" breed " : " Patou "
}
]
" ###);
let ( response , code ) = index
. search_post ( json! ( {
" q " : " petit chien " ,
2024-09-17 16:30:04 +02:00
" hybrid " : { " semanticRatio " : 1.0 , " embedder " : " default " }
2024-07-30 15:43:40 +02:00
} ) )
. await ;
snapshot! ( code , @ " 200 OK " ) ;
snapshot! ( json_string! ( response [ " hits " ] ) , @ r ###"
[
{
" id " : 2 ,
" name " : " Vénus " ,
" gender " : " F " ,
" birthyear " : 2003 ,
" breed " : " Jack Russel Terrier "
} ,
{
" id " : 3 ,
" name " : " Max " ,
" gender " : " M " ,
" birthyear " : 1995 ,
" breed " : " Labrador Retriever "
} ,
{
" id " : 1 ,
" name " : " Intel " ,
" gender " : " M " ,
" birthyear " : 2011 ,
" breed " : " Beagle "
} ,
{
" id " : 0 ,
" name " : " kefir " ,
" gender " : " M " ,
" birthyear " : 2023 ,
" breed " : " Patou "
}
]
" ###);
let ( response , code ) = index
. search_post ( json! ( {
" q " : " grand chien de berger des montagnes " ,
2024-09-17 16:30:04 +02:00
" hybrid " : { " semanticRatio " : 1.0 , " embedder " : " default " }
2024-07-30 15:43:40 +02:00
} ) )
. await ;
snapshot! ( code , @ " 200 OK " ) ;
snapshot! ( json_string! ( response [ " hits " ] ) , @ r ###"
[
{
" id " : 3 ,
" name " : " Max " ,
" gender " : " M " ,
" birthyear " : 1995 ,
" breed " : " Labrador Retriever "
} ,
{
" id " : 2 ,
" name " : " Vénus " ,
" gender " : " F " ,
" birthyear " : 2003 ,
" breed " : " Jack Russel Terrier "
} ,
{
" id " : 1 ,
" name " : " Intel " ,
" gender " : " M " ,
" birthyear " : 2011 ,
" breed " : " Beagle "
} ,
{
" id " : 0 ,
" name " : " kefir " ,
" gender " : " M " ,
" birthyear " : 2023 ,
" breed " : " Patou "
}
]
" ###);
}
// test with a server that responds 500 on 3 out of 4 calls
#[ actix_rt::test ]
async fn it_still_works ( ) {
let ( _mock , setting ) = create_fallible_mock ( ) . await ;
let server = get_server_vector ( ) . await ;
let index = server . index ( " doggo " ) ;
let ( response , code ) = index
. update_settings ( json! ( {
" embedders " : {
" default " : setting ,
} ,
} ) )
. await ;
snapshot! ( code , @ " 202 Accepted " ) ;
let task = server . wait_task ( response . uid ( ) ) . await ;
snapshot! ( task [ " status " ] , @ r ### ""succeeded""### ) ;
let documents = json! ( [
{ " id " : 0 , " name " : " kefir " , " gender " : " M " , " birthyear " : 2023 , " breed " : " Patou " } ,
{ " id " : 1 , " name " : " Intel " , " gender " : " M " , " birthyear " : 2011 , " breed " : " Beagle " } ,
{ " id " : 2 , " name " : " Vénus " , " gender " : " F " , " birthyear " : 2003 , " breed " : " Jack Russel Terrier " } ,
{ " id " : 3 , " name " : " Max " , " gender " : " M " , " birthyear " : 1995 , " breed " : " Labrador Retriever " } ,
] ) ;
let ( value , code ) = index . add_documents ( documents , None ) . await ;
snapshot! ( code , @ " 202 Accepted " ) ;
let task = index . wait_task ( value . uid ( ) ) . await ;
snapshot! ( task , @ r ###"
{
2024-07-31 17:57:55 +02:00
" uid " : " [uid] " ,
2024-11-13 11:27:12 +01:00
" batchUid " : " [batch_uid] " ,
2024-07-30 15:43:40 +02:00
" indexUid " : " doggo " ,
" status " : " succeeded " ,
" type " : " documentAdditionOrUpdate " ,
" canceledBy " : null ,
" details " : {
" receivedDocuments " : 4 ,
" indexedDocuments " : 4
} ,
" error " : null ,
" duration " : " [duration] " ,
" enqueuedAt " : " [date] " ,
" startedAt " : " [date] " ,
" finishedAt " : " [date] "
}
" ###);
let ( documents , _code ) = index
. get_all_documents ( GetAllDocumentsOptions { retrieve_vectors : true , .. Default ::default ( ) } )
. await ;
snapshot! ( json_string! ( documents , { " .results.*._vectors.default.embeddings " = > " [vector] " } ) , @ r ###"
{
" results " : [
{
" id " : 0 ,
" name " : " kefir " ,
" gender " : " M " ,
" birthyear " : 2023 ,
" breed " : " Patou " ,
" _vectors " : {
" default " : {
" embeddings " : " [vector] " ,
" regenerate " : true
}
}
} ,
{
" id " : 1 ,
" name " : " Intel " ,
" gender " : " M " ,
" birthyear " : 2011 ,
" breed " : " Beagle " ,
" _vectors " : {
" default " : {
" embeddings " : " [vector] " ,
" regenerate " : true
}
}
} ,
{
" id " : 2 ,
" name " : " Vénus " ,
" gender " : " F " ,
" birthyear " : 2003 ,
" breed " : " Jack Russel Terrier " ,
" _vectors " : {
" default " : {
" embeddings " : " [vector] " ,
" regenerate " : true
}
}
} ,
{
" id " : 3 ,
" name " : " Max " ,
" gender " : " M " ,
" birthyear " : 1995 ,
" breed " : " Labrador Retriever " ,
" _vectors " : {
" default " : {
" embeddings " : " [vector] " ,
" regenerate " : true
}
}
}
] ,
" offset " : 0 ,
" limit " : 20 ,
" total " : 4
}
" ###);
let ( response , code ) = index
. search_post ( json! ( {
" q " : " chien de chasse " ,
2024-09-17 16:30:04 +02:00
" hybrid " : { " semanticRatio " : 1.0 , " embedder " : " default " }
2024-07-30 15:43:40 +02:00
} ) )
. await ;
snapshot! ( code , @ " 200 OK " ) ;
snapshot! ( json_string! ( response [ " hits " ] ) , @ r ###"
[
{
" id " : 1 ,
" name " : " Intel " ,
" gender " : " M " ,
" birthyear " : 2011 ,
" breed " : " Beagle "
} ,
{
" id " : 0 ,
" name " : " kefir " ,
" gender " : " M " ,
" birthyear " : 2023 ,
" breed " : " Patou "
} ,
{
" id " : 3 ,
" name " : " Max " ,
" gender " : " M " ,
" birthyear " : 1995 ,
" breed " : " Labrador Retriever "
} ,
{
" id " : 2 ,
" name " : " Vénus " ,
" gender " : " F " ,
" birthyear " : 2003 ,
" breed " : " Jack Russel Terrier "
}
]
" ###);
let ( response , code ) = index
. search_post ( json! ( {
" q " : " petit chien " ,
2024-09-17 16:30:04 +02:00
" hybrid " : { " semanticRatio " : 1.0 , " embedder " : " default " }
2024-07-30 15:43:40 +02:00
} ) )
. await ;
snapshot! ( code , @ " 200 OK " ) ;
snapshot! ( json_string! ( response [ " hits " ] ) , @ r ###"
[
{
" id " : 1 ,
" name " : " Intel " ,
" gender " : " M " ,
" birthyear " : 2011 ,
" breed " : " Beagle "
} ,
{
" id " : 2 ,
" name " : " Vénus " ,
" gender " : " F " ,
" birthyear " : 2003 ,
" breed " : " Jack Russel Terrier "
} ,
{
" id " : 0 ,
" name " : " kefir " ,
" gender " : " M " ,
" birthyear " : 2023 ,
" breed " : " Patou "
} ,
{
" id " : 3 ,
" name " : " Max " ,
" gender " : " M " ,
" birthyear " : 1995 ,
" breed " : " Labrador Retriever "
}
]
" ###);
let ( response , code ) = index
. search_post ( json! ( {
" q " : " grand chien de berger des montagnes " ,
2024-09-17 16:30:04 +02:00
" hybrid " : { " semanticRatio " : 1.0 , " embedder " : " default " }
2024-07-30 15:43:40 +02:00
} ) )
. await ;
snapshot! ( code , @ " 200 OK " ) ;
snapshot! ( json_string! ( response [ " hits " ] ) , @ r ###"
[
{
" id " : 0 ,
" name " : " kefir " ,
" gender " : " M " ,
" birthyear " : 2023 ,
" breed " : " Patou "
} ,
{
" id " : 1 ,
" name " : " Intel " ,
" gender " : " M " ,
" birthyear " : 2011 ,
" breed " : " Beagle "
} ,
{
" id " : 3 ,
" name " : " Max " ,
" gender " : " M " ,
" birthyear " : 1995 ,
" breed " : " Labrador Retriever "
} ,
{
" id " : 2 ,
" name " : " Vénus " ,
" gender " : " F " ,
" birthyear " : 2003 ,
" breed " : " Jack Russel Terrier "
}
]
" ###);
}
2024-11-06 09:25:41 +01:00
// test with a server that responds 500 on 3 out of 4 calls
#[ actix_rt::test ]
async fn timeout ( ) {
let ( _mock , setting ) = create_slow_mock ( ) . await ;
let server = get_server_vector ( ) . await ;
let index = server . index ( " doggo " ) ;
let ( response , code ) = index
. update_settings ( json! ( {
" embedders " : {
" default " : setting ,
} ,
} ) )
. await ;
snapshot! ( code , @ " 202 Accepted " ) ;
let task = server . wait_task ( response . uid ( ) ) . await ;
snapshot! ( task [ " status " ] , @ r ### ""succeeded""### ) ;
let documents = json! ( [
{ " id " : 0 , " name " : " kefir " , " gender " : " M " , " birthyear " : 2023 , " breed " : " Patou " } ,
] ) ;
let ( value , code ) = index . add_documents ( documents , None ) . await ;
snapshot! ( code , @ " 202 Accepted " ) ;
let task = index . wait_task ( value . uid ( ) ) . await ;
snapshot! ( task , @ r ###"
{
" uid " : " [uid] " ,
2024-11-20 17:08:30 +01:00
" batchUid " : " [batch_uid] " ,
2024-11-06 09:25:41 +01:00
" indexUid " : " doggo " ,
" status " : " succeeded " ,
" type " : " documentAdditionOrUpdate " ,
" canceledBy " : null ,
" details " : {
" receivedDocuments " : 1 ,
" indexedDocuments " : 1
} ,
" error " : null ,
" duration " : " [duration] " ,
" enqueuedAt " : " [date] " ,
" startedAt " : " [date] " ,
" finishedAt " : " [date] "
}
" ###);
let ( documents , _code ) = index
. get_all_documents ( GetAllDocumentsOptions { retrieve_vectors : true , .. Default ::default ( ) } )
. await ;
snapshot! ( json_string! ( documents , { " .results.*._vectors.default.embeddings " = > " [vector] " } ) , @ r ###"
{
" results " : [
{
" id " : 0 ,
" name " : " kefir " ,
" gender " : " M " ,
" birthyear " : 2023 ,
" breed " : " Patou " ,
" _vectors " : {
" default " : {
" embeddings " : " [vector] " ,
" regenerate " : true
}
}
}
] ,
" offset " : 0 ,
" limit " : 20 ,
" total " : 1
}
" ###);
let ( response , code ) = index
. search_post ( json! ( {
" q " : " grand chien de berger des montagnes " ,
" hybrid " : { " semanticRatio " : 0.99 , " embedder " : " default " }
} ) )
. await ;
snapshot! ( code , @ " 200 OK " ) ;
snapshot! ( json_string! ( response [ " semanticHitCount " ] ) , @ " 0 " ) ;
snapshot! ( json_string! ( response [ " hits " ] ) , @ " [] " ) ;
let ( response , code ) = index
. search_post ( json! ( {
" q " : " grand chien de berger des montagnes " ,
" hybrid " : { " semanticRatio " : 0.99 , " embedder " : " default " }
} ) )
. await ;
snapshot! ( code , @ " 200 OK " ) ;
snapshot! ( json_string! ( response [ " semanticHitCount " ] ) , @ " 1 " ) ;
snapshot! ( json_string! ( response [ " hits " ] ) , @ r ###"
[
{
" id " : 0 ,
" name " : " kefir " ,
" gender " : " M " ,
" birthyear " : 2023 ,
" breed " : " Patou "
}
]
" ###);
let ( response , code ) = index
. search_post ( json! ( {
" q " : " grand chien de berger des montagnes " ,
" hybrid " : { " semanticRatio " : 0.99 , " embedder " : " default " }
} ) )
. await ;
snapshot! ( code , @ " 200 OK " ) ;
snapshot! ( json_string! ( response [ " semanticHitCount " ] ) , @ " 0 " ) ;
snapshot! ( json_string! ( response [ " hits " ] ) , @ " [] " ) ;
}
2024-07-30 15:43:40 +02:00
// test with a server that wrongly responds 400