mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-25 22:34:28 +01:00
Merge #4941
4941: Implement the binary quantization in meilisearch r=irevoire a=irevoire # Pull Request ## Related issue Fixes https://github.com/meilisearch/meilisearch/issues/4873 ## What does this PR do? - Add a settings for the binary quantization - Once enabled, the bq cannot be disabled TODO: - [ ] Missing a bunch of tests Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
commit
462a2329f1
10
Cargo.lock
generated
10
Cargo.lock
generated
@ -387,14 +387,14 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "arroy"
|
name = "arroy"
|
||||||
version = "0.4.0"
|
version = "0.4.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "git+https://github.com/meilisearch/arroy/?rev=2386594dfb009ce08821a925ccc89fb8e30bf73d#2386594dfb009ce08821a925ccc89fb8e30bf73d"
|
||||||
checksum = "2ece9e5347e7fdaaea3181dec7f916677ad5f3fcbac183648ce1924eb4aeef9a"
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bytemuck",
|
"bytemuck",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
"heed",
|
"heed",
|
||||||
"log",
|
"log",
|
||||||
"memmap2",
|
"memmap2",
|
||||||
|
"nohash",
|
||||||
"ordered-float",
|
"ordered-float",
|
||||||
"rand",
|
"rand",
|
||||||
"rayon",
|
"rayon",
|
||||||
@ -3686,6 +3686,12 @@ version = "0.0.3"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "6d02c0b00610773bb7fc61d85e13d86c7858cbdf00e1a120bfc41bc055dbaa0e"
|
checksum = "6d02c0b00610773bb7fc61d85e13d86c7858cbdf00e1a120bfc41bc055dbaa0e"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "nohash"
|
||||||
|
version = "0.2.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a0f889fb66f7acdf83442c35775764b51fed3c606ab9cee51500dbde2cf528ca"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "nom"
|
name = "nom"
|
||||||
version = "7.1.3"
|
version = "7.1.3"
|
||||||
|
@ -255,6 +255,8 @@ pub(crate) mod test {
|
|||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
|
insta::assert_json_snapshot!(vector_index.settings().unwrap());
|
||||||
|
|
||||||
{
|
{
|
||||||
let documents: Result<Vec<_>> = vector_index.documents().unwrap().collect();
|
let documents: Result<Vec<_>> = vector_index.documents().unwrap().collect();
|
||||||
let mut documents = documents.unwrap();
|
let mut documents = documents.unwrap();
|
||||||
|
@ -1,783 +1,56 @@
|
|||||||
---
|
---
|
||||||
source: dump/src/reader/mod.rs
|
source: dump/src/reader/mod.rs
|
||||||
expression: document
|
expression: vector_index.settings().unwrap()
|
||||||
---
|
---
|
||||||
{
|
{
|
||||||
"id": "e3",
|
"displayedAttributes": [
|
||||||
"desc": "overriden vector + map",
|
"*"
|
||||||
"_vectors": {
|
|
||||||
"default": [
|
|
||||||
0.2,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1
|
|
||||||
],
|
],
|
||||||
"toto": [
|
"searchableAttributes": [
|
||||||
0.1
|
"*"
|
||||||
]
|
],
|
||||||
|
"filterableAttributes": [],
|
||||||
|
"sortableAttributes": [],
|
||||||
|
"rankingRules": [
|
||||||
|
"words",
|
||||||
|
"typo",
|
||||||
|
"proximity",
|
||||||
|
"attribute",
|
||||||
|
"sort",
|
||||||
|
"exactness"
|
||||||
|
],
|
||||||
|
"stopWords": [],
|
||||||
|
"nonSeparatorTokens": [],
|
||||||
|
"separatorTokens": [],
|
||||||
|
"dictionary": [],
|
||||||
|
"synonyms": {},
|
||||||
|
"distinctAttribute": null,
|
||||||
|
"proximityPrecision": "byWord",
|
||||||
|
"typoTolerance": {
|
||||||
|
"enabled": true,
|
||||||
|
"minWordSizeForTypos": {
|
||||||
|
"oneTypo": 5,
|
||||||
|
"twoTypos": 9
|
||||||
|
},
|
||||||
|
"disableOnWords": [],
|
||||||
|
"disableOnAttributes": []
|
||||||
|
},
|
||||||
|
"faceting": {
|
||||||
|
"maxValuesPerFacet": 100,
|
||||||
|
"sortFacetValuesBy": {
|
||||||
|
"*": "alpha"
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
"pagination": {
|
||||||
|
"maxTotalHits": 1000
|
||||||
|
},
|
||||||
|
"embedders": {
|
||||||
|
"default": {
|
||||||
|
"source": "huggingFace",
|
||||||
|
"model": "BAAI/bge-base-en-v1.5",
|
||||||
|
"revision": "617ca489d9e86b49b8167676d8220688b99db36e",
|
||||||
|
"documentTemplate": "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"searchCutoffMs": null
|
||||||
}
|
}
|
||||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,780 @@
|
|||||||
|
---
|
||||||
|
source: dump/src/reader/mod.rs
|
||||||
|
expression: document
|
||||||
|
---
|
||||||
|
{
|
||||||
|
"id": "e0",
|
||||||
|
"desc": "overriden vector",
|
||||||
|
"_vectors": {
|
||||||
|
"default": [
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
@ -40,7 +40,7 @@ ureq = "2.10.0"
|
|||||||
uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
arroy = "0.4.0"
|
arroy = { git = "https://github.com/meilisearch/arroy/", rev = "2386594dfb009ce08821a925ccc89fb8e30bf73d" }
|
||||||
big_s = "1.0.2"
|
big_s = "1.0.2"
|
||||||
crossbeam = "0.8.4"
|
crossbeam = "0.8.4"
|
||||||
insta = { version = "1.39.0", features = ["json", "redactions"] }
|
insta = { version = "1.39.0", features = ["json", "redactions"] }
|
||||||
|
@ -1477,7 +1477,7 @@ impl IndexScheduler {
|
|||||||
.map(
|
.map(
|
||||||
|IndexEmbeddingConfig {
|
|IndexEmbeddingConfig {
|
||||||
name,
|
name,
|
||||||
config: milli::vector::EmbeddingConfig { embedder_options, prompt },
|
config: milli::vector::EmbeddingConfig { embedder_options, prompt, quantized },
|
||||||
..
|
..
|
||||||
}| {
|
}| {
|
||||||
let prompt =
|
let prompt =
|
||||||
@ -1486,7 +1486,10 @@ impl IndexScheduler {
|
|||||||
{
|
{
|
||||||
let embedders = self.embedders.read().unwrap();
|
let embedders = self.embedders.read().unwrap();
|
||||||
if let Some(embedder) = embedders.get(&embedder_options) {
|
if let Some(embedder) = embedders.get(&embedder_options) {
|
||||||
return Ok((name, (embedder.clone(), prompt)));
|
return Ok((
|
||||||
|
name,
|
||||||
|
(embedder.clone(), prompt, quantized.unwrap_or_default()),
|
||||||
|
));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1500,7 +1503,7 @@ impl IndexScheduler {
|
|||||||
let mut embedders = self.embedders.write().unwrap();
|
let mut embedders = self.embedders.write().unwrap();
|
||||||
embedders.insert(embedder_options, embedder.clone());
|
embedders.insert(embedder_options, embedder.clone());
|
||||||
}
|
}
|
||||||
Ok((name, (embedder, prompt)))
|
Ok((name, (embedder, prompt, quantized.unwrap_or_default())))
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
.collect();
|
.collect();
|
||||||
@ -5197,7 +5200,7 @@ mod tests {
|
|||||||
let simple_hf_name = name.clone();
|
let simple_hf_name = name.clone();
|
||||||
|
|
||||||
let configs = index_scheduler.embedders(configs).unwrap();
|
let configs = index_scheduler.embedders(configs).unwrap();
|
||||||
let (hf_embedder, _) = configs.get(&simple_hf_name).unwrap();
|
let (hf_embedder, _, _) = configs.get(&simple_hf_name).unwrap();
|
||||||
let beagle_embed = hf_embedder.embed_one(S("Intel the beagle best doggo")).unwrap();
|
let beagle_embed = hf_embedder.embed_one(S("Intel the beagle best doggo")).unwrap();
|
||||||
let lab_embed = hf_embedder.embed_one(S("Max the lab best doggo")).unwrap();
|
let lab_embed = hf_embedder.embed_one(S("Max the lab best doggo")).unwrap();
|
||||||
let patou_embed = hf_embedder.embed_one(S("kefir the patou best doggo")).unwrap();
|
let patou_embed = hf_embedder.embed_one(S("kefir the patou best doggo")).unwrap();
|
||||||
@ -5519,6 +5522,7 @@ mod tests {
|
|||||||
400,
|
400,
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
|
quantized: None,
|
||||||
},
|
},
|
||||||
user_provided: RoaringBitmap<[1, 2]>,
|
user_provided: RoaringBitmap<[1, 2]>,
|
||||||
},
|
},
|
||||||
@ -5531,28 +5535,8 @@ mod tests {
|
|||||||
|
|
||||||
// the document with the id 3 should keep its original embedding
|
// the document with the id 3 should keep its original embedding
|
||||||
let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap();
|
let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap();
|
||||||
let mut embeddings = Vec::new();
|
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||||
|
let embeddings = &embeddings["my_doggo_embedder"];
|
||||||
'vectors: for i in 0..=u8::MAX {
|
|
||||||
let reader = arroy::Reader::open(&rtxn, i as u16, index.vector_arroy)
|
|
||||||
.map(Some)
|
|
||||||
.or_else(|e| match e {
|
|
||||||
arroy::Error::MissingMetadata(_) => Ok(None),
|
|
||||||
e => Err(e),
|
|
||||||
})
|
|
||||||
.transpose();
|
|
||||||
|
|
||||||
let Some(reader) = reader else {
|
|
||||||
break 'vectors;
|
|
||||||
};
|
|
||||||
|
|
||||||
let embedding = reader.unwrap().item_vector(&rtxn, docid).unwrap();
|
|
||||||
if let Some(embedding) = embedding {
|
|
||||||
embeddings.push(embedding)
|
|
||||||
} else {
|
|
||||||
break 'vectors;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
snapshot!(embeddings.len(), @"1");
|
snapshot!(embeddings.len(), @"1");
|
||||||
assert!(embeddings[0].iter().all(|i| *i == 3.0), "{:?}", embeddings[0]);
|
assert!(embeddings[0].iter().all(|i| *i == 3.0), "{:?}", embeddings[0]);
|
||||||
@ -5737,6 +5721,7 @@ mod tests {
|
|||||||
400,
|
400,
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
|
quantized: None,
|
||||||
},
|
},
|
||||||
user_provided: RoaringBitmap<[0]>,
|
user_provided: RoaringBitmap<[0]>,
|
||||||
},
|
},
|
||||||
@ -5780,6 +5765,7 @@ mod tests {
|
|||||||
400,
|
400,
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
|
quantized: None,
|
||||||
},
|
},
|
||||||
user_provided: RoaringBitmap<[]>,
|
user_provided: RoaringBitmap<[]>,
|
||||||
},
|
},
|
||||||
|
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||||
2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
|
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||||
2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
|
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
|
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
|
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
enqueued [0,]
|
enqueued [0,]
|
||||||
|
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
enqueued []
|
enqueued []
|
||||||
|
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
enqueued [0,]
|
enqueued [0,]
|
||||||
|
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
enqueued []
|
enqueued []
|
||||||
|
@ -395,7 +395,10 @@ impl ErrorCode for milli::Error {
|
|||||||
| UserError::InvalidSettingsDimensions { .. }
|
| UserError::InvalidSettingsDimensions { .. }
|
||||||
| UserError::InvalidUrl { .. }
|
| UserError::InvalidUrl { .. }
|
||||||
| UserError::InvalidSettingsDocumentTemplateMaxBytes { .. }
|
| UserError::InvalidSettingsDocumentTemplateMaxBytes { .. }
|
||||||
| UserError::InvalidPrompt(_) => Code::InvalidSettingsEmbedders,
|
| UserError::InvalidPrompt(_)
|
||||||
|
| UserError::InvalidDisableBinaryQuantization { .. } => {
|
||||||
|
Code::InvalidSettingsEmbedders
|
||||||
|
}
|
||||||
UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders,
|
UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders,
|
||||||
UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders,
|
UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders,
|
||||||
UserError::NoPrimaryKeyCandidateFound => Code::IndexPrimaryKeyNoCandidateFound,
|
UserError::NoPrimaryKeyCandidateFound => Code::IndexPrimaryKeyNoCandidateFound,
|
||||||
|
@ -643,12 +643,19 @@ fn embedder_analytics(
|
|||||||
.max()
|
.max()
|
||||||
});
|
});
|
||||||
|
|
||||||
|
let binary_quantization_used = setting.as_ref().map(|map| {
|
||||||
|
map.values()
|
||||||
|
.filter_map(|config| config.clone().set())
|
||||||
|
.any(|config| config.binary_quantized.set().is_some())
|
||||||
|
});
|
||||||
|
|
||||||
json!(
|
json!(
|
||||||
{
|
{
|
||||||
"total": setting.as_ref().map(|s| s.len()),
|
"total": setting.as_ref().map(|s| s.len()),
|
||||||
"sources": sources,
|
"sources": sources,
|
||||||
"document_template_used": document_template_used,
|
"document_template_used": document_template_used,
|
||||||
"document_template_max_bytes": document_template_max_bytes
|
"document_template_max_bytes": document_template_max_bytes,
|
||||||
|
"binary_quantization_used": binary_quantization_used,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
@ -102,7 +102,7 @@ async fn similar(
|
|||||||
|
|
||||||
let index = index_scheduler.index(&index_uid)?;
|
let index = index_scheduler.index(&index_uid)?;
|
||||||
|
|
||||||
let (embedder_name, embedder) =
|
let (embedder_name, embedder, quantized) =
|
||||||
SearchKind::embedder(&index_scheduler, &index, &query.embedder, None)?;
|
SearchKind::embedder(&index_scheduler, &index, &query.embedder, None)?;
|
||||||
|
|
||||||
tokio::task::spawn_blocking(move || {
|
tokio::task::spawn_blocking(move || {
|
||||||
@ -111,6 +111,7 @@ async fn similar(
|
|||||||
query,
|
query,
|
||||||
embedder_name,
|
embedder_name,
|
||||||
embedder,
|
embedder,
|
||||||
|
quantized,
|
||||||
retrieve_vectors,
|
retrieve_vectors,
|
||||||
index_scheduler.features(),
|
index_scheduler.features(),
|
||||||
)
|
)
|
||||||
|
@ -274,8 +274,8 @@ pub struct HybridQuery {
|
|||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub enum SearchKind {
|
pub enum SearchKind {
|
||||||
KeywordOnly,
|
KeywordOnly,
|
||||||
SemanticOnly { embedder_name: String, embedder: Arc<Embedder> },
|
SemanticOnly { embedder_name: String, embedder: Arc<Embedder>, quantized: bool },
|
||||||
Hybrid { embedder_name: String, embedder: Arc<Embedder>, semantic_ratio: f32 },
|
Hybrid { embedder_name: String, embedder: Arc<Embedder>, quantized: bool, semantic_ratio: f32 },
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SearchKind {
|
impl SearchKind {
|
||||||
@ -285,9 +285,9 @@ impl SearchKind {
|
|||||||
embedder_name: &str,
|
embedder_name: &str,
|
||||||
vector_len: Option<usize>,
|
vector_len: Option<usize>,
|
||||||
) -> Result<Self, ResponseError> {
|
) -> Result<Self, ResponseError> {
|
||||||
let (embedder_name, embedder) =
|
let (embedder_name, embedder, quantized) =
|
||||||
Self::embedder(index_scheduler, index, embedder_name, vector_len)?;
|
Self::embedder(index_scheduler, index, embedder_name, vector_len)?;
|
||||||
Ok(Self::SemanticOnly { embedder_name, embedder })
|
Ok(Self::SemanticOnly { embedder_name, embedder, quantized })
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn hybrid(
|
pub(crate) fn hybrid(
|
||||||
@ -297,9 +297,9 @@ impl SearchKind {
|
|||||||
semantic_ratio: f32,
|
semantic_ratio: f32,
|
||||||
vector_len: Option<usize>,
|
vector_len: Option<usize>,
|
||||||
) -> Result<Self, ResponseError> {
|
) -> Result<Self, ResponseError> {
|
||||||
let (embedder_name, embedder) =
|
let (embedder_name, embedder, quantized) =
|
||||||
Self::embedder(index_scheduler, index, embedder_name, vector_len)?;
|
Self::embedder(index_scheduler, index, embedder_name, vector_len)?;
|
||||||
Ok(Self::Hybrid { embedder_name, embedder, semantic_ratio })
|
Ok(Self::Hybrid { embedder_name, embedder, quantized, semantic_ratio })
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn embedder(
|
pub(crate) fn embedder(
|
||||||
@ -307,16 +307,14 @@ impl SearchKind {
|
|||||||
index: &Index,
|
index: &Index,
|
||||||
embedder_name: &str,
|
embedder_name: &str,
|
||||||
vector_len: Option<usize>,
|
vector_len: Option<usize>,
|
||||||
) -> Result<(String, Arc<Embedder>), ResponseError> {
|
) -> Result<(String, Arc<Embedder>, bool), ResponseError> {
|
||||||
let embedder_configs = index.embedding_configs(&index.read_txn()?)?;
|
let embedder_configs = index.embedding_configs(&index.read_txn()?)?;
|
||||||
let embedders = index_scheduler.embedders(embedder_configs)?;
|
let embedders = index_scheduler.embedders(embedder_configs)?;
|
||||||
|
|
||||||
let embedder = embedders.get(embedder_name);
|
let (embedder, _, quantized) = embedders
|
||||||
|
.get(embedder_name)
|
||||||
let embedder = embedder
|
|
||||||
.ok_or(milli::UserError::InvalidEmbedder(embedder_name.to_owned()))
|
.ok_or(milli::UserError::InvalidEmbedder(embedder_name.to_owned()))
|
||||||
.map_err(milli::Error::from)?
|
.map_err(milli::Error::from)?;
|
||||||
.0;
|
|
||||||
|
|
||||||
if let Some(vector_len) = vector_len {
|
if let Some(vector_len) = vector_len {
|
||||||
if vector_len != embedder.dimensions() {
|
if vector_len != embedder.dimensions() {
|
||||||
@ -330,7 +328,7 @@ impl SearchKind {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok((embedder_name.to_owned(), embedder))
|
Ok((embedder_name.to_owned(), embedder, quantized))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -791,7 +789,7 @@ fn prepare_search<'t>(
|
|||||||
search.query(q);
|
search.query(q);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
SearchKind::SemanticOnly { embedder_name, embedder } => {
|
SearchKind::SemanticOnly { embedder_name, embedder, quantized } => {
|
||||||
let vector = match query.vector.clone() {
|
let vector = match query.vector.clone() {
|
||||||
Some(vector) => vector,
|
Some(vector) => vector,
|
||||||
None => {
|
None => {
|
||||||
@ -805,14 +803,19 @@ fn prepare_search<'t>(
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
search.semantic(embedder_name.clone(), embedder.clone(), Some(vector));
|
search.semantic(embedder_name.clone(), embedder.clone(), *quantized, Some(vector));
|
||||||
}
|
}
|
||||||
SearchKind::Hybrid { embedder_name, embedder, semantic_ratio: _ } => {
|
SearchKind::Hybrid { embedder_name, embedder, quantized, semantic_ratio: _ } => {
|
||||||
if let Some(q) = &query.q {
|
if let Some(q) = &query.q {
|
||||||
search.query(q);
|
search.query(q);
|
||||||
}
|
}
|
||||||
// will be embedded in hybrid search if necessary
|
// will be embedded in hybrid search if necessary
|
||||||
search.semantic(embedder_name.clone(), embedder.clone(), query.vector.clone());
|
search.semantic(
|
||||||
|
embedder_name.clone(),
|
||||||
|
embedder.clone(),
|
||||||
|
*quantized,
|
||||||
|
query.vector.clone(),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1441,6 +1444,7 @@ pub fn perform_similar(
|
|||||||
query: SimilarQuery,
|
query: SimilarQuery,
|
||||||
embedder_name: String,
|
embedder_name: String,
|
||||||
embedder: Arc<Embedder>,
|
embedder: Arc<Embedder>,
|
||||||
|
quantized: bool,
|
||||||
retrieve_vectors: RetrieveVectors,
|
retrieve_vectors: RetrieveVectors,
|
||||||
features: RoFeatures,
|
features: RoFeatures,
|
||||||
) -> Result<SimilarResult, ResponseError> {
|
) -> Result<SimilarResult, ResponseError> {
|
||||||
@ -1469,8 +1473,16 @@ pub fn perform_similar(
|
|||||||
));
|
));
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut similar =
|
let mut similar = milli::Similar::new(
|
||||||
milli::Similar::new(internal_id, offset, limit, index, &rtxn, embedder_name, embedder);
|
internal_id,
|
||||||
|
offset,
|
||||||
|
limit,
|
||||||
|
index,
|
||||||
|
&rtxn,
|
||||||
|
embedder_name,
|
||||||
|
embedder,
|
||||||
|
quantized,
|
||||||
|
);
|
||||||
|
|
||||||
if let Some(ref filter) = query.filter {
|
if let Some(ref filter) = query.filter {
|
||||||
if let Some(facets) = parse_filter(filter, Code::InvalidSimilarFilter, features)? {
|
if let Some(facets) = parse_filter(filter, Code::InvalidSimilarFilter, features)? {
|
||||||
|
380
meilisearch/tests/vector/binary_quantized.rs
Normal file
380
meilisearch/tests/vector/binary_quantized.rs
Normal file
@ -0,0 +1,380 @@
|
|||||||
|
use meili_snap::{json_string, snapshot};
|
||||||
|
|
||||||
|
use crate::common::{GetAllDocumentsOptions, Server};
|
||||||
|
use crate::json;
|
||||||
|
use crate::vector::generate_default_user_provided_documents;
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn retrieve_binary_quantize_status_in_the_settings() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("doggo");
|
||||||
|
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(value, @r###"
|
||||||
|
{
|
||||||
|
"vectorStore": true,
|
||||||
|
"metrics": false,
|
||||||
|
"logsRoute": false,
|
||||||
|
"editDocumentsByFunction": false,
|
||||||
|
"containsFilter": false
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
|
let (settings, code) = index.settings().await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(settings["embedders"]["manual"], @r###"{"source":"userProvided","dimensions":3}"###);
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
"binaryQuantized": false,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
|
let (settings, code) = index.settings().await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(settings["embedders"]["manual"], @r###"{"source":"userProvided","dimensions":3,"binaryQuantized":false}"###);
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
"binaryQuantized": true,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
|
let (settings, code) = index.settings().await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(settings["embedders"]["manual"], @r###"{"source":"userProvided","dimensions":3,"binaryQuantized":true}"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn binary_quantize_before_sending_documents() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("doggo");
|
||||||
|
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(value, @r###"
|
||||||
|
{
|
||||||
|
"vectorStore": true,
|
||||||
|
"metrics": false,
|
||||||
|
"logsRoute": false,
|
||||||
|
"editDocumentsByFunction": false,
|
||||||
|
"containsFilter": false
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
"binaryQuantized": true,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
|
let documents = json!([
|
||||||
|
{"id": 0, "name": "kefir", "_vectors": { "manual": [-1.2, -2.3, 3.2] }},
|
||||||
|
{"id": 1, "name": "echo", "_vectors": { "manual": [2.5, 1.5, -130] }},
|
||||||
|
]);
|
||||||
|
let (value, code) = index.add_documents(documents, None).await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
index.wait_task(value.uid()).await.succeeded();
|
||||||
|
|
||||||
|
// Make sure the documents are binary quantized
|
||||||
|
let (documents, _code) = index
|
||||||
|
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
|
||||||
|
.await;
|
||||||
|
snapshot!(json_string!(documents), @r###"
|
||||||
|
{
|
||||||
|
"results": [
|
||||||
|
{
|
||||||
|
"id": 0,
|
||||||
|
"name": "kefir",
|
||||||
|
"_vectors": {
|
||||||
|
"manual": {
|
||||||
|
"embeddings": [
|
||||||
|
[
|
||||||
|
-1.0,
|
||||||
|
-1.0,
|
||||||
|
1.0
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"regenerate": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"name": "echo",
|
||||||
|
"_vectors": {
|
||||||
|
"manual": {
|
||||||
|
"embeddings": [
|
||||||
|
[
|
||||||
|
1.0,
|
||||||
|
1.0,
|
||||||
|
-1.0
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"regenerate": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"offset": 0,
|
||||||
|
"limit": 20,
|
||||||
|
"total": 2
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn binary_quantize_after_sending_documents() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("doggo");
|
||||||
|
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(value, @r###"
|
||||||
|
{
|
||||||
|
"vectorStore": true,
|
||||||
|
"metrics": false,
|
||||||
|
"logsRoute": false,
|
||||||
|
"editDocumentsByFunction": false,
|
||||||
|
"containsFilter": false
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
|
let documents = json!([
|
||||||
|
{"id": 0, "name": "kefir", "_vectors": { "manual": [-1.2, -2.3, 3.2] }},
|
||||||
|
{"id": 1, "name": "echo", "_vectors": { "manual": [2.5, 1.5, -130] }},
|
||||||
|
]);
|
||||||
|
let (value, code) = index.add_documents(documents, None).await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
index.wait_task(value.uid()).await.succeeded();
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
"binaryQuantized": true,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
|
// Make sure the documents are binary quantized
|
||||||
|
let (documents, _code) = index
|
||||||
|
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
|
||||||
|
.await;
|
||||||
|
snapshot!(json_string!(documents), @r###"
|
||||||
|
{
|
||||||
|
"results": [
|
||||||
|
{
|
||||||
|
"id": 0,
|
||||||
|
"name": "kefir",
|
||||||
|
"_vectors": {
|
||||||
|
"manual": {
|
||||||
|
"embeddings": [
|
||||||
|
[
|
||||||
|
-1.0,
|
||||||
|
-1.0,
|
||||||
|
1.0
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"regenerate": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"name": "echo",
|
||||||
|
"_vectors": {
|
||||||
|
"manual": {
|
||||||
|
"embeddings": [
|
||||||
|
[
|
||||||
|
1.0,
|
||||||
|
1.0,
|
||||||
|
-1.0
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"regenerate": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"offset": 0,
|
||||||
|
"limit": 20,
|
||||||
|
"total": 2
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn try_to_disable_binary_quantization() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("doggo");
|
||||||
|
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(value, @r###"
|
||||||
|
{
|
||||||
|
"vectorStore": true,
|
||||||
|
"metrics": false,
|
||||||
|
"logsRoute": false,
|
||||||
|
"editDocumentsByFunction": false,
|
||||||
|
"containsFilter": false
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
"binaryQuantized": true,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
"binaryQuantized": false,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
let ret = server.wait_task(response.uid()).await;
|
||||||
|
snapshot!(ret, @r###"
|
||||||
|
{
|
||||||
|
"uid": "[uid]",
|
||||||
|
"indexUid": "doggo",
|
||||||
|
"status": "failed",
|
||||||
|
"type": "settingsUpdate",
|
||||||
|
"canceledBy": null,
|
||||||
|
"details": {
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
"binaryQuantized": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"error": {
|
||||||
|
"message": "`.embedders.manual.binaryQuantized`: Cannot disable the binary quantization.\n - Note: Binary quantization is a lossy operation that cannot be reverted.\n - Hint: Add a new embedder that is non-quantized and regenerate the vectors.",
|
||||||
|
"code": "invalid_settings_embedders",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_settings_embedders"
|
||||||
|
},
|
||||||
|
"duration": "[duration]",
|
||||||
|
"enqueuedAt": "[date]",
|
||||||
|
"startedAt": "[date]",
|
||||||
|
"finishedAt": "[date]"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn binary_quantize_clear_documents() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = generate_default_user_provided_documents(&server).await;
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"binaryQuantized": true,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
|
let (value, _code) = index.clear_all_documents().await;
|
||||||
|
index.wait_task(value.uid()).await.succeeded();
|
||||||
|
|
||||||
|
// Make sure the documents DB has been cleared
|
||||||
|
let (documents, _code) = index
|
||||||
|
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
|
||||||
|
.await;
|
||||||
|
snapshot!(json_string!(documents), @r###"
|
||||||
|
{
|
||||||
|
"results": [],
|
||||||
|
"offset": 0,
|
||||||
|
"limit": 20,
|
||||||
|
"total": 0
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
// Make sure the arroy DB has been cleared
|
||||||
|
let (documents, _code) =
|
||||||
|
index.search_post(json!({ "hybrid": { "embedder": "manual" }, "vector": [1, 1, 1] })).await;
|
||||||
|
snapshot!(documents, @r###"
|
||||||
|
{
|
||||||
|
"hits": [],
|
||||||
|
"query": "",
|
||||||
|
"processingTimeMs": "[duration]",
|
||||||
|
"limit": 20,
|
||||||
|
"offset": 0,
|
||||||
|
"estimatedTotalHits": 0,
|
||||||
|
"semanticHitCount": 0
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
@ -1,3 +1,4 @@
|
|||||||
|
mod binary_quantized;
|
||||||
mod openai;
|
mod openai;
|
||||||
mod rest;
|
mod rest;
|
||||||
mod settings;
|
mod settings;
|
||||||
|
@ -80,7 +80,7 @@ hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls",
|
|||||||
tiktoken-rs = "0.5.9"
|
tiktoken-rs = "0.5.9"
|
||||||
liquid = "0.26.6"
|
liquid = "0.26.6"
|
||||||
rhai = { version = "1.19.0", features = ["serde", "no_module", "no_custom_syntax", "no_time", "sync"] }
|
rhai = { version = "1.19.0", features = ["serde", "no_module", "no_custom_syntax", "no_time", "sync"] }
|
||||||
arroy = "0.4.0"
|
arroy = { git = "https://github.com/meilisearch/arroy/", rev = "2386594dfb009ce08821a925ccc89fb8e30bf73d" }
|
||||||
rand = "0.8.5"
|
rand = "0.8.5"
|
||||||
tracing = "0.1.40"
|
tracing = "0.1.40"
|
||||||
ureq = { version = "2.10.0", features = ["json"] }
|
ureq = { version = "2.10.0", features = ["json"] }
|
||||||
|
@ -258,6 +258,10 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
|
|||||||
},
|
},
|
||||||
#[error("`.embedders.{embedder_name}.dimensions`: `dimensions` cannot be zero")]
|
#[error("`.embedders.{embedder_name}.dimensions`: `dimensions` cannot be zero")]
|
||||||
InvalidSettingsDimensions { embedder_name: String },
|
InvalidSettingsDimensions { embedder_name: String },
|
||||||
|
#[error(
|
||||||
|
"`.embedders.{embedder_name}.binaryQuantized`: Cannot disable the binary quantization.\n - Note: Binary quantization is a lossy operation that cannot be reverted.\n - Hint: Add a new embedder that is non-quantized and regenerate the vectors."
|
||||||
|
)]
|
||||||
|
InvalidDisableBinaryQuantization { embedder_name: String },
|
||||||
#[error("`.embedders.{embedder_name}.documentTemplateMaxBytes`: `documentTemplateMaxBytes` cannot be zero")]
|
#[error("`.embedders.{embedder_name}.documentTemplateMaxBytes`: `documentTemplateMaxBytes` cannot be zero")]
|
||||||
InvalidSettingsDocumentTemplateMaxBytes { embedder_name: String },
|
InvalidSettingsDocumentTemplateMaxBytes { embedder_name: String },
|
||||||
#[error("`.embedders.{embedder_name}.url`: could not parse `{url}`: {inner_error}")]
|
#[error("`.embedders.{embedder_name}.url`: could not parse `{url}`: {inner_error}")]
|
||||||
|
@ -21,7 +21,7 @@ use crate::heed_codec::{BEU16StrCodec, FstSetCodec, StrBEU16Codec, StrRefCodec};
|
|||||||
use crate::order_by_map::OrderByMap;
|
use crate::order_by_map::OrderByMap;
|
||||||
use crate::proximity::ProximityPrecision;
|
use crate::proximity::ProximityPrecision;
|
||||||
use crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME;
|
use crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME;
|
||||||
use crate::vector::{Embedding, EmbeddingConfig};
|
use crate::vector::{ArroyWrapper, Embedding, EmbeddingConfig};
|
||||||
use crate::{
|
use crate::{
|
||||||
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
|
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
|
||||||
FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec,
|
FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec,
|
||||||
@ -162,7 +162,7 @@ pub struct Index {
|
|||||||
/// Maps an embedder name to its id in the arroy store.
|
/// Maps an embedder name to its id in the arroy store.
|
||||||
pub embedder_category_id: Database<Str, U8>,
|
pub embedder_category_id: Database<Str, U8>,
|
||||||
/// Vector store based on arroy™.
|
/// Vector store based on arroy™.
|
||||||
pub vector_arroy: arroy::Database<arroy::distances::Angular>,
|
pub vector_arroy: arroy::Database<Unspecified>,
|
||||||
|
|
||||||
/// Maps the document id to the document as an obkv store.
|
/// Maps the document id to the document as an obkv store.
|
||||||
pub(crate) documents: Database<BEU32, ObkvCodec>,
|
pub(crate) documents: Database<BEU32, ObkvCodec>,
|
||||||
@ -1614,15 +1614,17 @@ impl Index {
|
|||||||
&'a self,
|
&'a self,
|
||||||
rtxn: &'a RoTxn<'a>,
|
rtxn: &'a RoTxn<'a>,
|
||||||
embedder_id: u8,
|
embedder_id: u8,
|
||||||
) -> impl Iterator<Item = Result<arroy::Reader<'a, arroy::distances::Angular>>> + 'a {
|
quantized: bool,
|
||||||
|
) -> impl Iterator<Item = Result<ArroyWrapper>> + 'a {
|
||||||
crate::vector::arroy_db_range_for_embedder(embedder_id).map_while(move |k| {
|
crate::vector::arroy_db_range_for_embedder(embedder_id).map_while(move |k| {
|
||||||
arroy::Reader::open(rtxn, k, self.vector_arroy)
|
let reader = ArroyWrapper::new(self.vector_arroy, k, quantized);
|
||||||
.map(Some)
|
// Here we don't care about the dimensions, but we want to know if we can read
|
||||||
.or_else(|e| match e {
|
// in the database or if its metadata are missing because there is no document with that many vectors.
|
||||||
arroy::Error::MissingMetadata(_) => Ok(None),
|
match reader.dimensions(rtxn) {
|
||||||
e => Err(e.into()),
|
Ok(_) => Some(Ok(reader)),
|
||||||
})
|
Err(arroy::Error::MissingMetadata(_)) => None,
|
||||||
.transpose()
|
Err(e) => Some(Err(e.into())),
|
||||||
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1644,32 +1646,18 @@ impl Index {
|
|||||||
docid: DocumentId,
|
docid: DocumentId,
|
||||||
) -> Result<BTreeMap<String, Vec<Embedding>>> {
|
) -> Result<BTreeMap<String, Vec<Embedding>>> {
|
||||||
let mut res = BTreeMap::new();
|
let mut res = BTreeMap::new();
|
||||||
for row in self.embedder_category_id.iter(rtxn)? {
|
let embedding_configs = self.embedding_configs(rtxn)?;
|
||||||
let (embedder_name, embedder_id) = row?;
|
for config in embedding_configs {
|
||||||
let embedder_id = (embedder_id as u16) << 8;
|
let embedder_id = self.embedder_category_id.get(rtxn, &config.name)?.unwrap();
|
||||||
let mut embeddings = Vec::new();
|
let embeddings = self
|
||||||
'vectors: for i in 0..=u8::MAX {
|
.arroy_readers(rtxn, embedder_id, config.config.quantized())
|
||||||
let reader = arroy::Reader::open(rtxn, embedder_id | (i as u16), self.vector_arroy)
|
.map_while(|reader| {
|
||||||
.map(Some)
|
reader
|
||||||
.or_else(|e| match e {
|
.and_then(|r| r.item_vector(rtxn, docid).map_err(|e| e.into()))
|
||||||
arroy::Error::MissingMetadata(_) => Ok(None),
|
.transpose()
|
||||||
e => Err(e),
|
|
||||||
})
|
})
|
||||||
.transpose();
|
.collect::<Result<Vec<_>>>()?;
|
||||||
|
res.insert(config.name.to_owned(), embeddings);
|
||||||
let Some(reader) = reader else {
|
|
||||||
break 'vectors;
|
|
||||||
};
|
|
||||||
|
|
||||||
let embedding = reader?.item_vector(rtxn, docid)?;
|
|
||||||
if let Some(embedding) = embedding {
|
|
||||||
embeddings.push(embedding)
|
|
||||||
} else {
|
|
||||||
break 'vectors;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
res.insert(embedder_name.to_owned(), embeddings);
|
|
||||||
}
|
}
|
||||||
Ok(res)
|
Ok(res)
|
||||||
}
|
}
|
||||||
|
@ -190,7 +190,7 @@ impl<'a> Search<'a> {
|
|||||||
return Ok(return_keyword_results(self.limit, self.offset, keyword_results));
|
return Ok(return_keyword_results(self.limit, self.offset, keyword_results));
|
||||||
};
|
};
|
||||||
// no embedder, no semantic search
|
// no embedder, no semantic search
|
||||||
let Some(SemanticSearch { vector, embedder_name, embedder }) = semantic else {
|
let Some(SemanticSearch { vector, embedder_name, embedder, quantized }) = semantic else {
|
||||||
return Ok(return_keyword_results(self.limit, self.offset, keyword_results));
|
return Ok(return_keyword_results(self.limit, self.offset, keyword_results));
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -212,7 +212,7 @@ impl<'a> Search<'a> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
search.semantic =
|
search.semantic =
|
||||||
Some(SemanticSearch { vector: Some(vector_query), embedder_name, embedder });
|
Some(SemanticSearch { vector: Some(vector_query), embedder_name, embedder, quantized });
|
||||||
|
|
||||||
// TODO: would be better to have two distinct functions at this point
|
// TODO: would be better to have two distinct functions at this point
|
||||||
let vector_results = search.execute()?;
|
let vector_results = search.execute()?;
|
||||||
|
@ -32,6 +32,7 @@ pub struct SemanticSearch {
|
|||||||
vector: Option<Vec<f32>>,
|
vector: Option<Vec<f32>>,
|
||||||
embedder_name: String,
|
embedder_name: String,
|
||||||
embedder: Arc<Embedder>,
|
embedder: Arc<Embedder>,
|
||||||
|
quantized: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct Search<'a> {
|
pub struct Search<'a> {
|
||||||
@ -89,9 +90,10 @@ impl<'a> Search<'a> {
|
|||||||
&mut self,
|
&mut self,
|
||||||
embedder_name: String,
|
embedder_name: String,
|
||||||
embedder: Arc<Embedder>,
|
embedder: Arc<Embedder>,
|
||||||
|
quantized: bool,
|
||||||
vector: Option<Vec<f32>>,
|
vector: Option<Vec<f32>>,
|
||||||
) -> &mut Search<'a> {
|
) -> &mut Search<'a> {
|
||||||
self.semantic = Some(SemanticSearch { embedder_name, embedder, vector });
|
self.semantic = Some(SemanticSearch { embedder_name, embedder, quantized, vector });
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -206,7 +208,7 @@ impl<'a> Search<'a> {
|
|||||||
degraded,
|
degraded,
|
||||||
used_negative_operator,
|
used_negative_operator,
|
||||||
} = match self.semantic.as_ref() {
|
} = match self.semantic.as_ref() {
|
||||||
Some(SemanticSearch { vector: Some(vector), embedder_name, embedder }) => {
|
Some(SemanticSearch { vector: Some(vector), embedder_name, embedder, quantized }) => {
|
||||||
execute_vector_search(
|
execute_vector_search(
|
||||||
&mut ctx,
|
&mut ctx,
|
||||||
vector,
|
vector,
|
||||||
@ -219,6 +221,7 @@ impl<'a> Search<'a> {
|
|||||||
self.limit,
|
self.limit,
|
||||||
embedder_name,
|
embedder_name,
|
||||||
embedder,
|
embedder,
|
||||||
|
*quantized,
|
||||||
self.time_budget.clone(),
|
self.time_budget.clone(),
|
||||||
self.ranking_score_threshold,
|
self.ranking_score_threshold,
|
||||||
)?
|
)?
|
||||||
|
@ -312,6 +312,7 @@ fn get_ranking_rules_for_placeholder_search<'ctx>(
|
|||||||
Ok(ranking_rules)
|
Ok(ranking_rules)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(clippy::too_many_arguments)]
|
||||||
fn get_ranking_rules_for_vector<'ctx>(
|
fn get_ranking_rules_for_vector<'ctx>(
|
||||||
ctx: &SearchContext<'ctx>,
|
ctx: &SearchContext<'ctx>,
|
||||||
sort_criteria: &Option<Vec<AscDesc>>,
|
sort_criteria: &Option<Vec<AscDesc>>,
|
||||||
@ -320,6 +321,7 @@ fn get_ranking_rules_for_vector<'ctx>(
|
|||||||
target: &[f32],
|
target: &[f32],
|
||||||
embedder_name: &str,
|
embedder_name: &str,
|
||||||
embedder: &Embedder,
|
embedder: &Embedder,
|
||||||
|
quantized: bool,
|
||||||
) -> Result<Vec<BoxRankingRule<'ctx, PlaceholderQuery>>> {
|
) -> Result<Vec<BoxRankingRule<'ctx, PlaceholderQuery>>> {
|
||||||
// query graph search
|
// query graph search
|
||||||
|
|
||||||
@ -347,6 +349,7 @@ fn get_ranking_rules_for_vector<'ctx>(
|
|||||||
limit_plus_offset,
|
limit_plus_offset,
|
||||||
embedder_name,
|
embedder_name,
|
||||||
embedder,
|
embedder,
|
||||||
|
quantized,
|
||||||
)?;
|
)?;
|
||||||
ranking_rules.push(Box::new(vector_sort));
|
ranking_rules.push(Box::new(vector_sort));
|
||||||
vector = true;
|
vector = true;
|
||||||
@ -576,6 +579,7 @@ pub fn execute_vector_search(
|
|||||||
length: usize,
|
length: usize,
|
||||||
embedder_name: &str,
|
embedder_name: &str,
|
||||||
embedder: &Embedder,
|
embedder: &Embedder,
|
||||||
|
quantized: bool,
|
||||||
time_budget: TimeBudget,
|
time_budget: TimeBudget,
|
||||||
ranking_score_threshold: Option<f64>,
|
ranking_score_threshold: Option<f64>,
|
||||||
) -> Result<PartialSearchResult> {
|
) -> Result<PartialSearchResult> {
|
||||||
@ -591,6 +595,7 @@ pub fn execute_vector_search(
|
|||||||
vector,
|
vector,
|
||||||
embedder_name,
|
embedder_name,
|
||||||
embedder,
|
embedder,
|
||||||
|
quantized,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let mut placeholder_search_logger = logger::DefaultSearchLogger;
|
let mut placeholder_search_logger = logger::DefaultSearchLogger;
|
||||||
|
@ -16,6 +16,7 @@ pub struct VectorSort<Q: RankingRuleQueryTrait> {
|
|||||||
limit: usize,
|
limit: usize,
|
||||||
distribution_shift: Option<DistributionShift>,
|
distribution_shift: Option<DistributionShift>,
|
||||||
embedder_index: u8,
|
embedder_index: u8,
|
||||||
|
quantized: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
|
impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
|
||||||
@ -26,6 +27,7 @@ impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
|
|||||||
limit: usize,
|
limit: usize,
|
||||||
embedder_name: &str,
|
embedder_name: &str,
|
||||||
embedder: &Embedder,
|
embedder: &Embedder,
|
||||||
|
quantized: bool,
|
||||||
) -> Result<Self> {
|
) -> Result<Self> {
|
||||||
let embedder_index = ctx
|
let embedder_index = ctx
|
||||||
.index
|
.index
|
||||||
@ -41,6 +43,7 @@ impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
|
|||||||
limit,
|
limit,
|
||||||
distribution_shift: embedder.distribution(),
|
distribution_shift: embedder.distribution(),
|
||||||
embedder_index,
|
embedder_index,
|
||||||
|
quantized,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -49,16 +52,12 @@ impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
|
|||||||
ctx: &mut SearchContext<'_>,
|
ctx: &mut SearchContext<'_>,
|
||||||
vector_candidates: &RoaringBitmap,
|
vector_candidates: &RoaringBitmap,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let readers: std::result::Result<Vec<_>, _> =
|
|
||||||
ctx.index.arroy_readers(ctx.txn, self.embedder_index).collect();
|
|
||||||
let readers = readers?;
|
|
||||||
|
|
||||||
let target = &self.target;
|
let target = &self.target;
|
||||||
let mut results = Vec::new();
|
let mut results = Vec::new();
|
||||||
|
|
||||||
for reader in readers.iter() {
|
for reader in ctx.index.arroy_readers(ctx.txn, self.embedder_index, self.quantized) {
|
||||||
let nns_by_vector =
|
let nns_by_vector =
|
||||||
reader.nns_by_vector(ctx.txn, target, self.limit, None, Some(vector_candidates))?;
|
reader?.nns_by_vector(ctx.txn, target, self.limit, Some(vector_candidates))?;
|
||||||
results.extend(nns_by_vector.into_iter());
|
results.extend(nns_by_vector.into_iter());
|
||||||
}
|
}
|
||||||
results.sort_unstable_by_key(|(_, distance)| OrderedFloat(*distance));
|
results.sort_unstable_by_key(|(_, distance)| OrderedFloat(*distance));
|
||||||
|
@ -18,9 +18,11 @@ pub struct Similar<'a> {
|
|||||||
embedder_name: String,
|
embedder_name: String,
|
||||||
embedder: Arc<Embedder>,
|
embedder: Arc<Embedder>,
|
||||||
ranking_score_threshold: Option<f64>,
|
ranking_score_threshold: Option<f64>,
|
||||||
|
quantized: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Similar<'a> {
|
impl<'a> Similar<'a> {
|
||||||
|
#[allow(clippy::too_many_arguments)]
|
||||||
pub fn new(
|
pub fn new(
|
||||||
id: DocumentId,
|
id: DocumentId,
|
||||||
offset: usize,
|
offset: usize,
|
||||||
@ -29,6 +31,7 @@ impl<'a> Similar<'a> {
|
|||||||
rtxn: &'a heed::RoTxn<'a>,
|
rtxn: &'a heed::RoTxn<'a>,
|
||||||
embedder_name: String,
|
embedder_name: String,
|
||||||
embedder: Arc<Embedder>,
|
embedder: Arc<Embedder>,
|
||||||
|
quantized: bool,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
Self {
|
Self {
|
||||||
id,
|
id,
|
||||||
@ -40,6 +43,7 @@ impl<'a> Similar<'a> {
|
|||||||
embedder_name,
|
embedder_name,
|
||||||
embedder,
|
embedder,
|
||||||
ranking_score_threshold: None,
|
ranking_score_threshold: None,
|
||||||
|
quantized,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -67,19 +71,13 @@ impl<'a> Similar<'a> {
|
|||||||
.get(self.rtxn, &self.embedder_name)?
|
.get(self.rtxn, &self.embedder_name)?
|
||||||
.ok_or_else(|| crate::UserError::InvalidEmbedder(self.embedder_name.to_owned()))?;
|
.ok_or_else(|| crate::UserError::InvalidEmbedder(self.embedder_name.to_owned()))?;
|
||||||
|
|
||||||
let readers: std::result::Result<Vec<_>, _> =
|
|
||||||
self.index.arroy_readers(self.rtxn, embedder_index).collect();
|
|
||||||
|
|
||||||
let readers = readers?;
|
|
||||||
|
|
||||||
let mut results = Vec::new();
|
let mut results = Vec::new();
|
||||||
|
|
||||||
for reader in readers.iter() {
|
for reader in self.index.arroy_readers(self.rtxn, embedder_index, self.quantized) {
|
||||||
let nns_by_item = reader.nns_by_item(
|
let nns_by_item = reader?.nns_by_item(
|
||||||
self.rtxn,
|
self.rtxn,
|
||||||
self.id,
|
self.id,
|
||||||
self.limit + self.offset + 1,
|
self.limit + self.offset + 1,
|
||||||
None,
|
|
||||||
Some(&universe),
|
Some(&universe),
|
||||||
)?;
|
)?;
|
||||||
if let Some(mut nns_by_item) = nns_by_item {
|
if let Some(mut nns_by_item) = nns_by_item {
|
||||||
|
@ -20,7 +20,7 @@ use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
|
|||||||
use crate::update::settings::InnerIndexSettingsDiff;
|
use crate::update::settings::InnerIndexSettingsDiff;
|
||||||
use crate::vector::error::{EmbedErrorKind, PossibleEmbeddingMistakes, UnusedVectorsDistribution};
|
use crate::vector::error::{EmbedErrorKind, PossibleEmbeddingMistakes, UnusedVectorsDistribution};
|
||||||
use crate::vector::parsed_vectors::{ParsedVectorsDiff, VectorState, RESERVED_VECTORS_FIELD_NAME};
|
use crate::vector::parsed_vectors::{ParsedVectorsDiff, VectorState, RESERVED_VECTORS_FIELD_NAME};
|
||||||
use crate::vector::settings::{EmbedderAction, ReindexAction};
|
use crate::vector::settings::ReindexAction;
|
||||||
use crate::vector::{Embedder, Embeddings};
|
use crate::vector::{Embedder, Embeddings};
|
||||||
use crate::{try_split_array_at, DocumentId, FieldId, Result, ThreadPoolNoAbort};
|
use crate::{try_split_array_at, DocumentId, FieldId, Result, ThreadPoolNoAbort};
|
||||||
|
|
||||||
@ -208,10 +208,9 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
|
|
||||||
if reindex_vectors {
|
if reindex_vectors {
|
||||||
for (name, action) in settings_diff.embedding_config_updates.iter() {
|
for (name, action) in settings_diff.embedding_config_updates.iter() {
|
||||||
match action {
|
if let Some(action) = action.reindex() {
|
||||||
EmbedderAction::WriteBackToDocuments(_) => continue, // already deleted
|
let Some((embedder_name, (embedder, prompt, _quantized))) =
|
||||||
EmbedderAction::Reindex(action) => {
|
configs.remove_entry(name)
|
||||||
let Some((embedder_name, (embedder, prompt))) = configs.remove_entry(name)
|
|
||||||
else {
|
else {
|
||||||
tracing::error!(embedder = name, "Requested embedder config not found");
|
tracing::error!(embedder = name, "Requested embedder config not found");
|
||||||
continue;
|
continue;
|
||||||
@ -241,7 +240,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
let action = match action {
|
let action = match action {
|
||||||
ReindexAction::FullReindex => ExtractionAction::SettingsFullReindex,
|
ReindexAction::FullReindex => ExtractionAction::SettingsFullReindex,
|
||||||
ReindexAction::RegeneratePrompts => {
|
ReindexAction::RegeneratePrompts => {
|
||||||
let Some((_, old_prompt)) = old_configs.get(name) else {
|
let Some((_, old_prompt, _quantized)) = old_configs.get(name) else {
|
||||||
tracing::error!(embedder = name, "Old embedder config not found");
|
tracing::error!(embedder = name, "Old embedder config not found");
|
||||||
continue;
|
continue;
|
||||||
};
|
};
|
||||||
@ -260,13 +259,14 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
add_to_user_provided: RoaringBitmap::new(),
|
add_to_user_provided: RoaringBitmap::new(),
|
||||||
action,
|
action,
|
||||||
});
|
});
|
||||||
}
|
} else {
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// document operation
|
// document operation
|
||||||
|
|
||||||
for (embedder_name, (embedder, prompt)) in configs.into_iter() {
|
for (embedder_name, (embedder, prompt, _quantized)) in configs.into_iter() {
|
||||||
// (docid, _index) -> KvWriterDelAdd -> Vector
|
// (docid, _index) -> KvWriterDelAdd -> Vector
|
||||||
let manual_vectors_writer = create_writer(
|
let manual_vectors_writer = create_writer(
|
||||||
indexer.chunk_compression_type,
|
indexer.chunk_compression_type,
|
||||||
|
@ -43,7 +43,7 @@ use crate::update::index_documents::parallel::ImmutableObkvs;
|
|||||||
use crate::update::{
|
use crate::update::{
|
||||||
IndexerConfig, UpdateIndexingStep, WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst,
|
IndexerConfig, UpdateIndexingStep, WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst,
|
||||||
};
|
};
|
||||||
use crate::vector::EmbeddingConfigs;
|
use crate::vector::{ArroyWrapper, EmbeddingConfigs};
|
||||||
use crate::{CboRoaringBitmapCodec, Index, Object, Result};
|
use crate::{CboRoaringBitmapCodec, Index, Object, Result};
|
||||||
|
|
||||||
static MERGED_DATABASE_COUNT: usize = 7;
|
static MERGED_DATABASE_COUNT: usize = 7;
|
||||||
@ -679,6 +679,24 @@ where
|
|||||||
let number_of_documents = self.index.number_of_documents(self.wtxn)?;
|
let number_of_documents = self.index.number_of_documents(self.wtxn)?;
|
||||||
let mut rng = rand::rngs::StdRng::seed_from_u64(42);
|
let mut rng = rand::rngs::StdRng::seed_from_u64(42);
|
||||||
|
|
||||||
|
// If an embedder wasn't used in the typedchunk but must be binary quantized
|
||||||
|
// we should insert it in `dimension`
|
||||||
|
for (name, action) in settings_diff.embedding_config_updates.iter() {
|
||||||
|
if action.is_being_quantized && !dimension.contains_key(name.as_str()) {
|
||||||
|
let index = self.index.embedder_category_id.get(self.wtxn, name)?.ok_or(
|
||||||
|
InternalError::DatabaseMissingEntry {
|
||||||
|
db_name: "embedder_category_id",
|
||||||
|
key: None,
|
||||||
|
},
|
||||||
|
)?;
|
||||||
|
let first_id = crate::vector::arroy_db_range_for_embedder(index).next().unwrap();
|
||||||
|
let reader =
|
||||||
|
ArroyWrapper::new(self.index.vector_arroy, first_id, action.was_quantized);
|
||||||
|
let dim = reader.dimensions(self.wtxn)?;
|
||||||
|
dimension.insert(name.to_string(), dim);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for (embedder_name, dimension) in dimension {
|
for (embedder_name, dimension) in dimension {
|
||||||
let wtxn = &mut *self.wtxn;
|
let wtxn = &mut *self.wtxn;
|
||||||
let vector_arroy = self.index.vector_arroy;
|
let vector_arroy = self.index.vector_arroy;
|
||||||
@ -686,13 +704,23 @@ where
|
|||||||
let embedder_index = self.index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or(
|
let embedder_index = self.index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or(
|
||||||
InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None },
|
InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None },
|
||||||
)?;
|
)?;
|
||||||
|
let embedder_config = settings_diff.embedding_config_updates.get(&embedder_name);
|
||||||
|
let was_quantized = settings_diff
|
||||||
|
.old
|
||||||
|
.embedding_configs
|
||||||
|
.get(&embedder_name)
|
||||||
|
.map_or(false, |conf| conf.2);
|
||||||
|
let is_quantizing = embedder_config.map_or(false, |action| action.is_being_quantized);
|
||||||
|
|
||||||
pool.install(|| {
|
pool.install(|| {
|
||||||
for k in crate::vector::arroy_db_range_for_embedder(embedder_index) {
|
for k in crate::vector::arroy_db_range_for_embedder(embedder_index) {
|
||||||
let writer = arroy::Writer::new(vector_arroy, k, dimension);
|
let mut writer = ArroyWrapper::new(vector_arroy, k, was_quantized);
|
||||||
if writer.need_build(wtxn)? {
|
if is_quantizing {
|
||||||
writer.build(wtxn, &mut rng, None)?;
|
writer.quantize(wtxn, k, dimension)?;
|
||||||
} else if writer.is_empty(wtxn)? {
|
}
|
||||||
|
if writer.need_build(wtxn, dimension)? {
|
||||||
|
writer.build(wtxn, &mut rng, dimension)?;
|
||||||
|
} else if writer.is_empty(wtxn, dimension)? {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2746,6 +2774,7 @@ mod tests {
|
|||||||
response: Setting::NotSet,
|
response: Setting::NotSet,
|
||||||
distribution: Setting::NotSet,
|
distribution: Setting::NotSet,
|
||||||
headers: Setting::NotSet,
|
headers: Setting::NotSet,
|
||||||
|
binary_quantized: Setting::NotSet,
|
||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
settings.set_embedder_settings(embedders);
|
settings.set_embedder_settings(embedders);
|
||||||
@ -2774,7 +2803,7 @@ mod tests {
|
|||||||
std::sync::Arc::new(crate::vector::Embedder::new(embedder.embedder_options).unwrap());
|
std::sync::Arc::new(crate::vector::Embedder::new(embedder.embedder_options).unwrap());
|
||||||
let res = index
|
let res = index
|
||||||
.search(&rtxn)
|
.search(&rtxn)
|
||||||
.semantic(embedder_name, embedder, Some([0.0, 1.0, 2.0].to_vec()))
|
.semantic(embedder_name, embedder, false, Some([0.0, 1.0, 2.0].to_vec()))
|
||||||
.execute()
|
.execute()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
assert_eq!(res.documents_ids.len(), 3);
|
assert_eq!(res.documents_ids.len(), 3);
|
||||||
|
@ -28,7 +28,8 @@ use crate::update::index_documents::GrenadParameters;
|
|||||||
use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
|
use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
|
||||||
use crate::update::{AvailableDocumentsIds, UpdateIndexingStep};
|
use crate::update::{AvailableDocumentsIds, UpdateIndexingStep};
|
||||||
use crate::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
|
use crate::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
|
||||||
use crate::vector::settings::{EmbedderAction, WriteBackToDocuments};
|
use crate::vector::settings::WriteBackToDocuments;
|
||||||
|
use crate::vector::ArroyWrapper;
|
||||||
use crate::{
|
use crate::{
|
||||||
is_faceted_by, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result,
|
is_faceted_by, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result,
|
||||||
};
|
};
|
||||||
@ -989,19 +990,17 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
None
|
None
|
||||||
};
|
};
|
||||||
|
|
||||||
let readers: Result<
|
let readers: Result<BTreeMap<&str, (Vec<ArroyWrapper>, &RoaringBitmap)>> = settings_diff
|
||||||
BTreeMap<&str, (Vec<arroy::Reader<'_, arroy::distances::Angular>>, &RoaringBitmap)>,
|
|
||||||
> = settings_diff
|
|
||||||
.embedding_config_updates
|
.embedding_config_updates
|
||||||
.iter()
|
.iter()
|
||||||
.filter_map(|(name, action)| {
|
.filter_map(|(name, action)| {
|
||||||
if let EmbedderAction::WriteBackToDocuments(WriteBackToDocuments {
|
if let Some(WriteBackToDocuments { embedder_id, user_provided }) =
|
||||||
embedder_id,
|
action.write_back()
|
||||||
user_provided,
|
|
||||||
}) = action
|
|
||||||
{
|
{
|
||||||
let readers: Result<Vec<_>> =
|
let readers: Result<Vec<_>> = self
|
||||||
self.index.arroy_readers(wtxn, *embedder_id).collect();
|
.index
|
||||||
|
.arroy_readers(wtxn, *embedder_id, action.was_quantized)
|
||||||
|
.collect();
|
||||||
match readers {
|
match readers {
|
||||||
Ok(readers) => Some(Ok((name.as_str(), (readers, user_provided)))),
|
Ok(readers) => Some(Ok((name.as_str(), (readers, user_provided)))),
|
||||||
Err(error) => Some(Err(error)),
|
Err(error) => Some(Err(error)),
|
||||||
@ -1104,23 +1103,14 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut writers = Vec::new();
|
|
||||||
|
|
||||||
// delete all vectors from the embedders that need removal
|
// delete all vectors from the embedders that need removal
|
||||||
for (_, (readers, _)) in readers {
|
for (_, (readers, _)) in readers {
|
||||||
for reader in readers {
|
for reader in readers {
|
||||||
let dimensions = reader.dimensions();
|
let dimensions = reader.dimensions(wtxn)?;
|
||||||
let arroy_index = reader.index();
|
reader.clear(wtxn, dimensions)?;
|
||||||
drop(reader);
|
|
||||||
let writer = arroy::Writer::new(self.index.vector_arroy, arroy_index, dimensions);
|
|
||||||
writers.push(writer);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for writer in writers {
|
|
||||||
writer.clear(wtxn)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
let grenad_params = GrenadParameters {
|
let grenad_params = GrenadParameters {
|
||||||
chunk_compression_type: self.indexer_settings.chunk_compression_type,
|
chunk_compression_type: self.indexer_settings.chunk_compression_type,
|
||||||
chunk_compression_level: self.indexer_settings.chunk_compression_level,
|
chunk_compression_level: self.indexer_settings.chunk_compression_level,
|
||||||
|
@ -27,6 +27,7 @@ use crate::update::index_documents::helpers::{
|
|||||||
as_cloneable_grenad, keep_latest_obkv, try_split_array_at,
|
as_cloneable_grenad, keep_latest_obkv, try_split_array_at,
|
||||||
};
|
};
|
||||||
use crate::update::settings::InnerIndexSettingsDiff;
|
use crate::update::settings::InnerIndexSettingsDiff;
|
||||||
|
use crate::vector::ArroyWrapper;
|
||||||
use crate::{
|
use crate::{
|
||||||
lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, InternalError,
|
lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, InternalError,
|
||||||
Result, SerializationError, U8StrStrCodec,
|
Result, SerializationError, U8StrStrCodec,
|
||||||
@ -666,9 +667,14 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
let embedder_index = index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or(
|
let embedder_index = index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or(
|
||||||
InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None },
|
InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None },
|
||||||
)?;
|
)?;
|
||||||
|
let binary_quantized = settings_diff
|
||||||
|
.old
|
||||||
|
.embedding_configs
|
||||||
|
.get(&embedder_name)
|
||||||
|
.map_or(false, |conf| conf.2);
|
||||||
// FIXME: allow customizing distance
|
// FIXME: allow customizing distance
|
||||||
let writers: Vec<_> = crate::vector::arroy_db_range_for_embedder(embedder_index)
|
let writers: Vec<_> = crate::vector::arroy_db_range_for_embedder(embedder_index)
|
||||||
.map(|k| arroy::Writer::new(index.vector_arroy, k, expected_dimension))
|
.map(|k| ArroyWrapper::new(index.vector_arroy, k, binary_quantized))
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
// remove vectors for docids we want them removed
|
// remove vectors for docids we want them removed
|
||||||
@ -679,7 +685,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
|
|
||||||
for writer in &writers {
|
for writer in &writers {
|
||||||
// Uses invariant: vectors are packed in the first writers.
|
// Uses invariant: vectors are packed in the first writers.
|
||||||
if !writer.del_item(wtxn, docid)? {
|
if !writer.del_item(wtxn, expected_dimension, docid)? {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -711,7 +717,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
for (embedding, writer) in embeddings.iter().zip(&writers) {
|
for (embedding, writer) in embeddings.iter().zip(&writers) {
|
||||||
writer.add_item(wtxn, docid, embedding)?;
|
writer.add_item(wtxn, expected_dimension, docid, embedding)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -734,7 +740,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
break;
|
break;
|
||||||
};
|
};
|
||||||
if candidate == vector {
|
if candidate == vector {
|
||||||
writer.del_item(wtxn, docid)?;
|
writer.del_item(wtxn, expected_dimension, docid)?;
|
||||||
deleted_index = Some(index);
|
deleted_index = Some(index);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -751,8 +757,13 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
if let Some((last_index, vector)) = last_index_with_a_vector {
|
if let Some((last_index, vector)) = last_index_with_a_vector {
|
||||||
// unwrap: computed the index from the list of writers
|
// unwrap: computed the index from the list of writers
|
||||||
let writer = writers.get(last_index).unwrap();
|
let writer = writers.get(last_index).unwrap();
|
||||||
writer.del_item(wtxn, docid)?;
|
writer.del_item(wtxn, expected_dimension, docid)?;
|
||||||
writers.get(deleted_index).unwrap().add_item(wtxn, docid, &vector)?;
|
writers.get(deleted_index).unwrap().add_item(
|
||||||
|
wtxn,
|
||||||
|
expected_dimension,
|
||||||
|
docid,
|
||||||
|
&vector,
|
||||||
|
)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -762,8 +773,8 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
|
|
||||||
// overflow was detected during vector extraction.
|
// overflow was detected during vector extraction.
|
||||||
for writer in &writers {
|
for writer in &writers {
|
||||||
if !writer.contains_item(wtxn, docid)? {
|
if !writer.contains_item(wtxn, expected_dimension, docid)? {
|
||||||
writer.add_item(wtxn, docid, &vector)?;
|
writer.add_item(wtxn, expected_dimension, docid, &vector)?;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -954,7 +954,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
let old_configs = self.index.embedding_configs(self.wtxn)?;
|
let old_configs = self.index.embedding_configs(self.wtxn)?;
|
||||||
let remove_all: Result<BTreeMap<String, EmbedderAction>> = old_configs
|
let remove_all: Result<BTreeMap<String, EmbedderAction>> = old_configs
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|IndexEmbeddingConfig { name, config: _, user_provided }| -> Result<_> {
|
.map(|IndexEmbeddingConfig { name, config, user_provided }| -> Result<_> {
|
||||||
let embedder_id =
|
let embedder_id =
|
||||||
self.index.embedder_category_id.get(self.wtxn, &name)?.ok_or(
|
self.index.embedder_category_id.get(self.wtxn, &name)?.ok_or(
|
||||||
crate::InternalError::DatabaseMissingEntry {
|
crate::InternalError::DatabaseMissingEntry {
|
||||||
@ -964,10 +964,10 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
)?;
|
)?;
|
||||||
Ok((
|
Ok((
|
||||||
name,
|
name,
|
||||||
EmbedderAction::WriteBackToDocuments(WriteBackToDocuments {
|
EmbedderAction::with_write_back(
|
||||||
embedder_id,
|
WriteBackToDocuments { embedder_id, user_provided },
|
||||||
user_provided,
|
config.quantized(),
|
||||||
}),
|
),
|
||||||
))
|
))
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
@ -1004,7 +1004,8 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
match joined {
|
match joined {
|
||||||
// updated config
|
// updated config
|
||||||
EitherOrBoth::Both((name, (old, user_provided)), (_, new)) => {
|
EitherOrBoth::Both((name, (old, user_provided)), (_, new)) => {
|
||||||
let settings_diff = SettingsDiff::from_settings(old, new);
|
let was_quantized = old.binary_quantized.set().unwrap_or_default();
|
||||||
|
let settings_diff = SettingsDiff::from_settings(&name, old, new)?;
|
||||||
match settings_diff {
|
match settings_diff {
|
||||||
SettingsDiff::Remove => {
|
SettingsDiff::Remove => {
|
||||||
tracing::debug!(
|
tracing::debug!(
|
||||||
@ -1023,25 +1024,29 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
self.index.embedder_category_id.delete(self.wtxn, &name)?;
|
self.index.embedder_category_id.delete(self.wtxn, &name)?;
|
||||||
embedder_actions.insert(
|
embedder_actions.insert(
|
||||||
name,
|
name,
|
||||||
EmbedderAction::WriteBackToDocuments(WriteBackToDocuments {
|
EmbedderAction::with_write_back(
|
||||||
embedder_id,
|
WriteBackToDocuments { embedder_id, user_provided },
|
||||||
user_provided,
|
was_quantized,
|
||||||
}),
|
),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
SettingsDiff::Reindex { action, updated_settings } => {
|
SettingsDiff::Reindex { action, updated_settings, quantize } => {
|
||||||
tracing::debug!(
|
tracing::debug!(
|
||||||
embedder = name,
|
embedder = name,
|
||||||
user_provided = user_provided.len(),
|
user_provided = user_provided.len(),
|
||||||
?action,
|
?action,
|
||||||
"reindex embedder"
|
"reindex embedder"
|
||||||
);
|
);
|
||||||
embedder_actions.insert(name.clone(), EmbedderAction::Reindex(action));
|
embedder_actions.insert(
|
||||||
|
name.clone(),
|
||||||
|
EmbedderAction::with_reindex(action, was_quantized)
|
||||||
|
.with_is_being_quantized(quantize),
|
||||||
|
);
|
||||||
let new =
|
let new =
|
||||||
validate_embedding_settings(Setting::Set(updated_settings), &name)?;
|
validate_embedding_settings(Setting::Set(updated_settings), &name)?;
|
||||||
updated_configs.insert(name, (new, user_provided));
|
updated_configs.insert(name, (new, user_provided));
|
||||||
}
|
}
|
||||||
SettingsDiff::UpdateWithoutReindex { updated_settings } => {
|
SettingsDiff::UpdateWithoutReindex { updated_settings, quantize } => {
|
||||||
tracing::debug!(
|
tracing::debug!(
|
||||||
embedder = name,
|
embedder = name,
|
||||||
user_provided = user_provided.len(),
|
user_provided = user_provided.len(),
|
||||||
@ -1049,6 +1054,12 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
);
|
);
|
||||||
let new =
|
let new =
|
||||||
validate_embedding_settings(Setting::Set(updated_settings), &name)?;
|
validate_embedding_settings(Setting::Set(updated_settings), &name)?;
|
||||||
|
if quantize {
|
||||||
|
embedder_actions.insert(
|
||||||
|
name.clone(),
|
||||||
|
EmbedderAction::default().with_is_being_quantized(true),
|
||||||
|
);
|
||||||
|
}
|
||||||
updated_configs.insert(name, (new, user_provided));
|
updated_configs.insert(name, (new, user_provided));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1067,8 +1078,10 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
&mut setting,
|
&mut setting,
|
||||||
);
|
);
|
||||||
let setting = validate_embedding_settings(setting, &name)?;
|
let setting = validate_embedding_settings(setting, &name)?;
|
||||||
embedder_actions
|
embedder_actions.insert(
|
||||||
.insert(name.clone(), EmbedderAction::Reindex(ReindexAction::FullReindex));
|
name.clone(),
|
||||||
|
EmbedderAction::with_reindex(ReindexAction::FullReindex, false),
|
||||||
|
);
|
||||||
updated_configs.insert(name, (setting, RoaringBitmap::new()));
|
updated_configs.insert(name, (setting, RoaringBitmap::new()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1082,21 +1095,16 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
let mut find_free_index =
|
let mut find_free_index =
|
||||||
move || free_indices.find(|(_, free)| **free).map(|(index, _)| index as u8);
|
move || free_indices.find(|(_, free)| **free).map(|(index, _)| index as u8);
|
||||||
for (name, action) in embedder_actions.iter() {
|
for (name, action) in embedder_actions.iter() {
|
||||||
match action {
|
// ignore actions that are not possible for a new embedder
|
||||||
EmbedderAction::Reindex(ReindexAction::RegeneratePrompts) => {
|
if matches!(action.reindex(), Some(ReindexAction::FullReindex))
|
||||||
/* cannot be a new embedder, so has to have an id already */
|
&& self.index.embedder_category_id.get(self.wtxn, name)?.is_none()
|
||||||
}
|
{
|
||||||
EmbedderAction::Reindex(ReindexAction::FullReindex) => {
|
let id =
|
||||||
if self.index.embedder_category_id.get(self.wtxn, name)?.is_none() {
|
find_free_index().ok_or(UserError::TooManyEmbedders(updated_configs.len()))?;
|
||||||
let id = find_free_index()
|
|
||||||
.ok_or(UserError::TooManyEmbedders(updated_configs.len()))?;
|
|
||||||
tracing::debug!(embedder = name, id, "assigning free id to new embedder");
|
tracing::debug!(embedder = name, id, "assigning free id to new embedder");
|
||||||
self.index.embedder_category_id.put(self.wtxn, name, &id)?;
|
self.index.embedder_category_id.put(self.wtxn, name, &id)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
EmbedderAction::WriteBackToDocuments(_) => { /* already removed */ }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
let updated_configs: Vec<IndexEmbeddingConfig> = updated_configs
|
let updated_configs: Vec<IndexEmbeddingConfig> = updated_configs
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.filter_map(|(name, (config, user_provided))| match config {
|
.filter_map(|(name, (config, user_provided))| match config {
|
||||||
@ -1277,7 +1285,11 @@ impl InnerIndexSettingsDiff {
|
|||||||
|
|
||||||
// if the user-defined searchables changed, then we need to reindex prompts.
|
// if the user-defined searchables changed, then we need to reindex prompts.
|
||||||
if cache_user_defined_searchables {
|
if cache_user_defined_searchables {
|
||||||
for (embedder_name, (config, _)) in new_settings.embedding_configs.inner_as_ref() {
|
for (embedder_name, (config, _, _quantized)) in
|
||||||
|
new_settings.embedding_configs.inner_as_ref()
|
||||||
|
{
|
||||||
|
let was_quantized =
|
||||||
|
old_settings.embedding_configs.get(embedder_name).map_or(false, |conf| conf.2);
|
||||||
// skip embedders that don't use document templates
|
// skip embedders that don't use document templates
|
||||||
if !config.uses_document_template() {
|
if !config.uses_document_template() {
|
||||||
continue;
|
continue;
|
||||||
@ -1287,16 +1299,19 @@ impl InnerIndexSettingsDiff {
|
|||||||
// this always makes the code clearer by explicitly handling the cases
|
// this always makes the code clearer by explicitly handling the cases
|
||||||
match embedding_config_updates.entry(embedder_name.clone()) {
|
match embedding_config_updates.entry(embedder_name.clone()) {
|
||||||
std::collections::btree_map::Entry::Vacant(entry) => {
|
std::collections::btree_map::Entry::Vacant(entry) => {
|
||||||
entry.insert(EmbedderAction::Reindex(ReindexAction::RegeneratePrompts));
|
entry.insert(EmbedderAction::with_reindex(
|
||||||
|
ReindexAction::RegeneratePrompts,
|
||||||
|
was_quantized,
|
||||||
|
));
|
||||||
}
|
}
|
||||||
std::collections::btree_map::Entry::Occupied(entry) => match entry.get() {
|
std::collections::btree_map::Entry::Occupied(entry) => {
|
||||||
EmbedderAction::WriteBackToDocuments(_) => { /* we are deleting this embedder, so no point in regeneration */
|
let EmbedderAction {
|
||||||
|
was_quantized: _,
|
||||||
|
is_being_quantized: _,
|
||||||
|
write_back: _, // We are deleting this embedder, so no point in regeneration
|
||||||
|
reindex: _, // We are already fully reindexing
|
||||||
|
} = entry.get();
|
||||||
}
|
}
|
||||||
EmbedderAction::Reindex(ReindexAction::FullReindex) => { /* we are already fully reindexing */
|
|
||||||
}
|
|
||||||
EmbedderAction::Reindex(ReindexAction::RegeneratePrompts) => { /* we are already regenerating prompts */
|
|
||||||
}
|
|
||||||
},
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1546,7 +1561,7 @@ fn embedders(embedding_configs: Vec<IndexEmbeddingConfig>) -> Result<EmbeddingCo
|
|||||||
.map(
|
.map(
|
||||||
|IndexEmbeddingConfig {
|
|IndexEmbeddingConfig {
|
||||||
name,
|
name,
|
||||||
config: EmbeddingConfig { embedder_options, prompt },
|
config: EmbeddingConfig { embedder_options, prompt, quantized },
|
||||||
..
|
..
|
||||||
}| {
|
}| {
|
||||||
let prompt = Arc::new(prompt.try_into().map_err(crate::Error::from)?);
|
let prompt = Arc::new(prompt.try_into().map_err(crate::Error::from)?);
|
||||||
@ -1556,7 +1571,7 @@ fn embedders(embedding_configs: Vec<IndexEmbeddingConfig>) -> Result<EmbeddingCo
|
|||||||
.map_err(crate::vector::Error::from)
|
.map_err(crate::vector::Error::from)
|
||||||
.map_err(crate::Error::from)?,
|
.map_err(crate::Error::from)?,
|
||||||
);
|
);
|
||||||
Ok((name, (embedder, prompt)))
|
Ok((name, (embedder, prompt, quantized.unwrap_or_default())))
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
.collect();
|
.collect();
|
||||||
@ -1581,6 +1596,7 @@ fn validate_prompt(
|
|||||||
response,
|
response,
|
||||||
distribution,
|
distribution,
|
||||||
headers,
|
headers,
|
||||||
|
binary_quantized: binary_quantize,
|
||||||
}) => {
|
}) => {
|
||||||
let max_bytes = match document_template_max_bytes.set() {
|
let max_bytes = match document_template_max_bytes.set() {
|
||||||
Some(max_bytes) => NonZeroUsize::new(max_bytes).ok_or_else(|| {
|
Some(max_bytes) => NonZeroUsize::new(max_bytes).ok_or_else(|| {
|
||||||
@ -1613,6 +1629,7 @@ fn validate_prompt(
|
|||||||
response,
|
response,
|
||||||
distribution,
|
distribution,
|
||||||
headers,
|
headers,
|
||||||
|
binary_quantized: binary_quantize,
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
new => Ok(new),
|
new => Ok(new),
|
||||||
@ -1638,6 +1655,7 @@ pub fn validate_embedding_settings(
|
|||||||
response,
|
response,
|
||||||
distribution,
|
distribution,
|
||||||
headers,
|
headers,
|
||||||
|
binary_quantized: binary_quantize,
|
||||||
} = settings;
|
} = settings;
|
||||||
|
|
||||||
if let Some(0) = dimensions.set() {
|
if let Some(0) = dimensions.set() {
|
||||||
@ -1678,6 +1696,7 @@ pub fn validate_embedding_settings(
|
|||||||
response,
|
response,
|
||||||
distribution,
|
distribution,
|
||||||
headers,
|
headers,
|
||||||
|
binary_quantized: binary_quantize,
|
||||||
}));
|
}));
|
||||||
};
|
};
|
||||||
match inferred_source {
|
match inferred_source {
|
||||||
@ -1779,6 +1798,7 @@ pub fn validate_embedding_settings(
|
|||||||
response,
|
response,
|
||||||
distribution,
|
distribution,
|
||||||
headers,
|
headers,
|
||||||
|
binary_quantized: binary_quantize,
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,8 +1,12 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use arroy::distances::{Angular, BinaryQuantizedAngular};
|
||||||
|
use arroy::ItemId;
|
||||||
use deserr::{DeserializeError, Deserr};
|
use deserr::{DeserializeError, Deserr};
|
||||||
|
use heed::{RoTxn, RwTxn, Unspecified};
|
||||||
use ordered_float::OrderedFloat;
|
use ordered_float::OrderedFloat;
|
||||||
|
use roaring::RoaringBitmap;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use self::error::{EmbedError, NewEmbedderError};
|
use self::error::{EmbedError, NewEmbedderError};
|
||||||
@ -26,6 +30,171 @@ pub type Embedding = Vec<f32>;
|
|||||||
|
|
||||||
pub const REQUEST_PARALLELISM: usize = 40;
|
pub const REQUEST_PARALLELISM: usize = 40;
|
||||||
|
|
||||||
|
pub struct ArroyWrapper {
|
||||||
|
quantized: bool,
|
||||||
|
index: u16,
|
||||||
|
database: arroy::Database<Unspecified>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ArroyWrapper {
|
||||||
|
pub fn new(database: arroy::Database<Unspecified>, index: u16, quantized: bool) -> Self {
|
||||||
|
Self { database, index, quantized }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn index(&self) -> u16 {
|
||||||
|
self.index
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn dimensions(&self, rtxn: &RoTxn) -> Result<usize, arroy::Error> {
|
||||||
|
if self.quantized {
|
||||||
|
Ok(arroy::Reader::open(rtxn, self.index, self.quantized_db())?.dimensions())
|
||||||
|
} else {
|
||||||
|
Ok(arroy::Reader::open(rtxn, self.index, self.angular_db())?.dimensions())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn quantize(
|
||||||
|
&mut self,
|
||||||
|
wtxn: &mut RwTxn,
|
||||||
|
index: u16,
|
||||||
|
dimension: usize,
|
||||||
|
) -> Result<(), arroy::Error> {
|
||||||
|
if !self.quantized {
|
||||||
|
let writer = arroy::Writer::new(self.angular_db(), index, dimension);
|
||||||
|
writer.prepare_changing_distance::<BinaryQuantizedAngular>(wtxn)?;
|
||||||
|
self.quantized = true;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn need_build(&self, rtxn: &RoTxn, dimension: usize) -> Result<bool, arroy::Error> {
|
||||||
|
if self.quantized {
|
||||||
|
arroy::Writer::new(self.quantized_db(), self.index, dimension).need_build(rtxn)
|
||||||
|
} else {
|
||||||
|
arroy::Writer::new(self.angular_db(), self.index, dimension).need_build(rtxn)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn build<R: rand::Rng + rand::SeedableRng>(
|
||||||
|
&self,
|
||||||
|
wtxn: &mut RwTxn,
|
||||||
|
rng: &mut R,
|
||||||
|
dimension: usize,
|
||||||
|
) -> Result<(), arroy::Error> {
|
||||||
|
if self.quantized {
|
||||||
|
arroy::Writer::new(self.quantized_db(), self.index, dimension).build(wtxn, rng, None)
|
||||||
|
} else {
|
||||||
|
arroy::Writer::new(self.angular_db(), self.index, dimension).build(wtxn, rng, None)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn add_item(
|
||||||
|
&self,
|
||||||
|
wtxn: &mut RwTxn,
|
||||||
|
dimension: usize,
|
||||||
|
item_id: arroy::ItemId,
|
||||||
|
vector: &[f32],
|
||||||
|
) -> Result<(), arroy::Error> {
|
||||||
|
if self.quantized {
|
||||||
|
arroy::Writer::new(self.quantized_db(), self.index, dimension)
|
||||||
|
.add_item(wtxn, item_id, vector)
|
||||||
|
} else {
|
||||||
|
arroy::Writer::new(self.angular_db(), self.index, dimension)
|
||||||
|
.add_item(wtxn, item_id, vector)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn del_item(
|
||||||
|
&self,
|
||||||
|
wtxn: &mut RwTxn,
|
||||||
|
dimension: usize,
|
||||||
|
item_id: arroy::ItemId,
|
||||||
|
) -> Result<bool, arroy::Error> {
|
||||||
|
if self.quantized {
|
||||||
|
arroy::Writer::new(self.quantized_db(), self.index, dimension).del_item(wtxn, item_id)
|
||||||
|
} else {
|
||||||
|
arroy::Writer::new(self.angular_db(), self.index, dimension).del_item(wtxn, item_id)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn clear(&self, wtxn: &mut RwTxn, dimension: usize) -> Result<(), arroy::Error> {
|
||||||
|
if self.quantized {
|
||||||
|
arroy::Writer::new(self.quantized_db(), self.index, dimension).clear(wtxn)
|
||||||
|
} else {
|
||||||
|
arroy::Writer::new(self.angular_db(), self.index, dimension).clear(wtxn)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_empty(&self, rtxn: &RoTxn, dimension: usize) -> Result<bool, arroy::Error> {
|
||||||
|
if self.quantized {
|
||||||
|
arroy::Writer::new(self.quantized_db(), self.index, dimension).is_empty(rtxn)
|
||||||
|
} else {
|
||||||
|
arroy::Writer::new(self.angular_db(), self.index, dimension).is_empty(rtxn)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn contains_item(
|
||||||
|
&self,
|
||||||
|
rtxn: &RoTxn,
|
||||||
|
dimension: usize,
|
||||||
|
item: arroy::ItemId,
|
||||||
|
) -> Result<bool, arroy::Error> {
|
||||||
|
if self.quantized {
|
||||||
|
arroy::Writer::new(self.quantized_db(), self.index, dimension).contains_item(rtxn, item)
|
||||||
|
} else {
|
||||||
|
arroy::Writer::new(self.angular_db(), self.index, dimension).contains_item(rtxn, item)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn nns_by_item(
|
||||||
|
&self,
|
||||||
|
rtxn: &RoTxn,
|
||||||
|
item: ItemId,
|
||||||
|
limit: usize,
|
||||||
|
filter: Option<&RoaringBitmap>,
|
||||||
|
) -> Result<Option<Vec<(ItemId, f32)>>, arroy::Error> {
|
||||||
|
if self.quantized {
|
||||||
|
arroy::Reader::open(rtxn, self.index, self.quantized_db())?
|
||||||
|
.nns_by_item(rtxn, item, limit, None, None, filter)
|
||||||
|
} else {
|
||||||
|
arroy::Reader::open(rtxn, self.index, self.angular_db())?
|
||||||
|
.nns_by_item(rtxn, item, limit, None, None, filter)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn nns_by_vector(
|
||||||
|
&self,
|
||||||
|
txn: &RoTxn,
|
||||||
|
item: &[f32],
|
||||||
|
limit: usize,
|
||||||
|
filter: Option<&RoaringBitmap>,
|
||||||
|
) -> Result<Vec<(ItemId, f32)>, arroy::Error> {
|
||||||
|
if self.quantized {
|
||||||
|
arroy::Reader::open(txn, self.index, self.quantized_db())?
|
||||||
|
.nns_by_vector(txn, item, limit, None, None, filter)
|
||||||
|
} else {
|
||||||
|
arroy::Reader::open(txn, self.index, self.angular_db())?
|
||||||
|
.nns_by_vector(txn, item, limit, None, None, filter)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn item_vector(&self, rtxn: &RoTxn, docid: u32) -> Result<Option<Vec<f32>>, arroy::Error> {
|
||||||
|
if self.quantized {
|
||||||
|
arroy::Reader::open(rtxn, self.index, self.quantized_db())?.item_vector(rtxn, docid)
|
||||||
|
} else {
|
||||||
|
arroy::Reader::open(rtxn, self.index, self.angular_db())?.item_vector(rtxn, docid)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn angular_db(&self) -> arroy::Database<Angular> {
|
||||||
|
self.database.remap_data_type()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn quantized_db(&self) -> arroy::Database<BinaryQuantizedAngular> {
|
||||||
|
self.database.remap_data_type()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// One or multiple embeddings stored consecutively in a flat vector.
|
/// One or multiple embeddings stored consecutively in a flat vector.
|
||||||
pub struct Embeddings<F> {
|
pub struct Embeddings<F> {
|
||||||
data: Vec<F>,
|
data: Vec<F>,
|
||||||
@ -124,39 +293,48 @@ pub struct EmbeddingConfig {
|
|||||||
pub embedder_options: EmbedderOptions,
|
pub embedder_options: EmbedderOptions,
|
||||||
/// Document template
|
/// Document template
|
||||||
pub prompt: PromptData,
|
pub prompt: PromptData,
|
||||||
|
/// If this embedder is binary quantized
|
||||||
|
pub quantized: Option<bool>,
|
||||||
// TODO: add metrics and anything needed
|
// TODO: add metrics and anything needed
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl EmbeddingConfig {
|
||||||
|
pub fn quantized(&self) -> bool {
|
||||||
|
self.quantized.unwrap_or_default()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Map of embedder configurations.
|
/// Map of embedder configurations.
|
||||||
///
|
///
|
||||||
/// Each configuration is mapped to a name.
|
/// Each configuration is mapped to a name.
|
||||||
#[derive(Clone, Default)]
|
#[derive(Clone, Default)]
|
||||||
pub struct EmbeddingConfigs(HashMap<String, (Arc<Embedder>, Arc<Prompt>)>);
|
pub struct EmbeddingConfigs(HashMap<String, (Arc<Embedder>, Arc<Prompt>, bool)>);
|
||||||
|
|
||||||
impl EmbeddingConfigs {
|
impl EmbeddingConfigs {
|
||||||
/// Create the map from its internal component.s
|
/// Create the map from its internal component.s
|
||||||
pub fn new(data: HashMap<String, (Arc<Embedder>, Arc<Prompt>)>) -> Self {
|
pub fn new(data: HashMap<String, (Arc<Embedder>, Arc<Prompt>, bool)>) -> Self {
|
||||||
Self(data)
|
Self(data)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get an embedder configuration and template from its name.
|
/// Get an embedder configuration and template from its name.
|
||||||
pub fn get(&self, name: &str) -> Option<(Arc<Embedder>, Arc<Prompt>)> {
|
pub fn get(&self, name: &str) -> Option<(Arc<Embedder>, Arc<Prompt>, bool)> {
|
||||||
self.0.get(name).cloned()
|
self.0.get(name).cloned()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn inner_as_ref(&self) -> &HashMap<String, (Arc<Embedder>, Arc<Prompt>)> {
|
pub fn inner_as_ref(&self) -> &HashMap<String, (Arc<Embedder>, Arc<Prompt>, bool)> {
|
||||||
&self.0
|
&self.0
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn into_inner(self) -> HashMap<String, (Arc<Embedder>, Arc<Prompt>)> {
|
pub fn into_inner(self) -> HashMap<String, (Arc<Embedder>, Arc<Prompt>, bool)> {
|
||||||
self.0
|
self.0
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl IntoIterator for EmbeddingConfigs {
|
impl IntoIterator for EmbeddingConfigs {
|
||||||
type Item = (String, (Arc<Embedder>, Arc<Prompt>));
|
type Item = (String, (Arc<Embedder>, Arc<Prompt>, bool));
|
||||||
|
|
||||||
type IntoIter = std::collections::hash_map::IntoIter<String, (Arc<Embedder>, Arc<Prompt>)>;
|
type IntoIter =
|
||||||
|
std::collections::hash_map::IntoIter<String, (Arc<Embedder>, Arc<Prompt>, bool)>;
|
||||||
|
|
||||||
fn into_iter(self) -> Self::IntoIter {
|
fn into_iter(self) -> Self::IntoIter {
|
||||||
self.0.into_iter()
|
self.0.into_iter()
|
||||||
|
@ -32,6 +32,9 @@ pub struct EmbeddingSettings {
|
|||||||
pub dimensions: Setting<usize>,
|
pub dimensions: Setting<usize>,
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||||
#[deserr(default)]
|
#[deserr(default)]
|
||||||
|
pub binary_quantized: Setting<bool>,
|
||||||
|
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||||
|
#[deserr(default)]
|
||||||
pub document_template: Setting<String>,
|
pub document_template: Setting<String>,
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||||
#[deserr(default)]
|
#[deserr(default)]
|
||||||
@ -85,23 +88,63 @@ pub enum ReindexAction {
|
|||||||
|
|
||||||
pub enum SettingsDiff {
|
pub enum SettingsDiff {
|
||||||
Remove,
|
Remove,
|
||||||
Reindex { action: ReindexAction, updated_settings: EmbeddingSettings },
|
Reindex { action: ReindexAction, updated_settings: EmbeddingSettings, quantize: bool },
|
||||||
UpdateWithoutReindex { updated_settings: EmbeddingSettings },
|
UpdateWithoutReindex { updated_settings: EmbeddingSettings, quantize: bool },
|
||||||
}
|
}
|
||||||
|
|
||||||
pub enum EmbedderAction {
|
#[derive(Default, Debug)]
|
||||||
WriteBackToDocuments(WriteBackToDocuments),
|
pub struct EmbedderAction {
|
||||||
Reindex(ReindexAction),
|
pub was_quantized: bool,
|
||||||
|
pub is_being_quantized: bool,
|
||||||
|
pub write_back: Option<WriteBackToDocuments>,
|
||||||
|
pub reindex: Option<ReindexAction>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl EmbedderAction {
|
||||||
|
pub fn is_being_quantized(&self) -> bool {
|
||||||
|
self.is_being_quantized
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn write_back(&self) -> Option<&WriteBackToDocuments> {
|
||||||
|
self.write_back.as_ref()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn reindex(&self) -> Option<&ReindexAction> {
|
||||||
|
self.reindex.as_ref()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn with_is_being_quantized(mut self, quantize: bool) -> Self {
|
||||||
|
self.is_being_quantized = quantize;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn with_write_back(write_back: WriteBackToDocuments, was_quantized: bool) -> Self {
|
||||||
|
Self {
|
||||||
|
was_quantized,
|
||||||
|
is_being_quantized: false,
|
||||||
|
write_back: Some(write_back),
|
||||||
|
reindex: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn with_reindex(reindex: ReindexAction, was_quantized: bool) -> Self {
|
||||||
|
Self { was_quantized, is_being_quantized: false, write_back: None, reindex: Some(reindex) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
pub struct WriteBackToDocuments {
|
pub struct WriteBackToDocuments {
|
||||||
pub embedder_id: u8,
|
pub embedder_id: u8,
|
||||||
pub user_provided: RoaringBitmap,
|
pub user_provided: RoaringBitmap,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SettingsDiff {
|
impl SettingsDiff {
|
||||||
pub fn from_settings(old: EmbeddingSettings, new: Setting<EmbeddingSettings>) -> Self {
|
pub fn from_settings(
|
||||||
match new {
|
embedder_name: &str,
|
||||||
|
old: EmbeddingSettings,
|
||||||
|
new: Setting<EmbeddingSettings>,
|
||||||
|
) -> Result<Self, UserError> {
|
||||||
|
let ret = match new {
|
||||||
Setting::Set(new) => {
|
Setting::Set(new) => {
|
||||||
let EmbeddingSettings {
|
let EmbeddingSettings {
|
||||||
mut source,
|
mut source,
|
||||||
@ -116,6 +159,7 @@ impl SettingsDiff {
|
|||||||
mut distribution,
|
mut distribution,
|
||||||
mut headers,
|
mut headers,
|
||||||
mut document_template_max_bytes,
|
mut document_template_max_bytes,
|
||||||
|
binary_quantized: mut binary_quantize,
|
||||||
} = old;
|
} = old;
|
||||||
|
|
||||||
let EmbeddingSettings {
|
let EmbeddingSettings {
|
||||||
@ -131,8 +175,17 @@ impl SettingsDiff {
|
|||||||
distribution: new_distribution,
|
distribution: new_distribution,
|
||||||
headers: new_headers,
|
headers: new_headers,
|
||||||
document_template_max_bytes: new_document_template_max_bytes,
|
document_template_max_bytes: new_document_template_max_bytes,
|
||||||
|
binary_quantized: new_binary_quantize,
|
||||||
} = new;
|
} = new;
|
||||||
|
|
||||||
|
if matches!(binary_quantize, Setting::Set(true))
|
||||||
|
&& matches!(new_binary_quantize, Setting::Set(false))
|
||||||
|
{
|
||||||
|
return Err(UserError::InvalidDisableBinaryQuantization {
|
||||||
|
embedder_name: embedder_name.to_string(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
let mut reindex_action = None;
|
let mut reindex_action = None;
|
||||||
|
|
||||||
// **Warning**: do not use short-circuiting || here, we want all these operations applied
|
// **Warning**: do not use short-circuiting || here, we want all these operations applied
|
||||||
@ -172,6 +225,7 @@ impl SettingsDiff {
|
|||||||
_ => {}
|
_ => {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
let binary_quantize_changed = binary_quantize.apply(new_binary_quantize);
|
||||||
if url.apply(new_url) {
|
if url.apply(new_url) {
|
||||||
match source {
|
match source {
|
||||||
// do not regenerate on an url change in OpenAI
|
// do not regenerate on an url change in OpenAI
|
||||||
@ -231,16 +285,27 @@ impl SettingsDiff {
|
|||||||
distribution,
|
distribution,
|
||||||
headers,
|
headers,
|
||||||
document_template_max_bytes,
|
document_template_max_bytes,
|
||||||
|
binary_quantized: binary_quantize,
|
||||||
};
|
};
|
||||||
|
|
||||||
match reindex_action {
|
match reindex_action {
|
||||||
Some(action) => Self::Reindex { action, updated_settings },
|
Some(action) => Self::Reindex {
|
||||||
None => Self::UpdateWithoutReindex { updated_settings },
|
action,
|
||||||
|
updated_settings,
|
||||||
|
quantize: binary_quantize_changed,
|
||||||
|
},
|
||||||
|
None => Self::UpdateWithoutReindex {
|
||||||
|
updated_settings,
|
||||||
|
quantize: binary_quantize_changed,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Setting::Reset => Self::Remove,
|
Setting::Reset => Self::Remove,
|
||||||
Setting::NotSet => Self::UpdateWithoutReindex { updated_settings: old },
|
Setting::NotSet => {
|
||||||
|
Self::UpdateWithoutReindex { updated_settings: old, quantize: false }
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
Ok(ret)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -486,7 +551,7 @@ impl std::fmt::Display for EmbedderSource {
|
|||||||
|
|
||||||
impl From<EmbeddingConfig> for EmbeddingSettings {
|
impl From<EmbeddingConfig> for EmbeddingSettings {
|
||||||
fn from(value: EmbeddingConfig) -> Self {
|
fn from(value: EmbeddingConfig) -> Self {
|
||||||
let EmbeddingConfig { embedder_options, prompt } = value;
|
let EmbeddingConfig { embedder_options, prompt, quantized } = value;
|
||||||
let document_template_max_bytes =
|
let document_template_max_bytes =
|
||||||
Setting::Set(prompt.max_bytes.unwrap_or(default_max_bytes()).get());
|
Setting::Set(prompt.max_bytes.unwrap_or(default_max_bytes()).get());
|
||||||
match embedder_options {
|
match embedder_options {
|
||||||
@ -507,6 +572,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
|||||||
response: Setting::NotSet,
|
response: Setting::NotSet,
|
||||||
headers: Setting::NotSet,
|
headers: Setting::NotSet,
|
||||||
distribution: Setting::some_or_not_set(distribution),
|
distribution: Setting::some_or_not_set(distribution),
|
||||||
|
binary_quantized: Setting::some_or_not_set(quantized),
|
||||||
},
|
},
|
||||||
super::EmbedderOptions::OpenAi(super::openai::EmbedderOptions {
|
super::EmbedderOptions::OpenAi(super::openai::EmbedderOptions {
|
||||||
url,
|
url,
|
||||||
@ -527,6 +593,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
|||||||
response: Setting::NotSet,
|
response: Setting::NotSet,
|
||||||
headers: Setting::NotSet,
|
headers: Setting::NotSet,
|
||||||
distribution: Setting::some_or_not_set(distribution),
|
distribution: Setting::some_or_not_set(distribution),
|
||||||
|
binary_quantized: Setting::some_or_not_set(quantized),
|
||||||
},
|
},
|
||||||
super::EmbedderOptions::Ollama(super::ollama::EmbedderOptions {
|
super::EmbedderOptions::Ollama(super::ollama::EmbedderOptions {
|
||||||
embedding_model,
|
embedding_model,
|
||||||
@ -547,6 +614,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
|||||||
response: Setting::NotSet,
|
response: Setting::NotSet,
|
||||||
headers: Setting::NotSet,
|
headers: Setting::NotSet,
|
||||||
distribution: Setting::some_or_not_set(distribution),
|
distribution: Setting::some_or_not_set(distribution),
|
||||||
|
binary_quantized: Setting::some_or_not_set(quantized),
|
||||||
},
|
},
|
||||||
super::EmbedderOptions::UserProvided(super::manual::EmbedderOptions {
|
super::EmbedderOptions::UserProvided(super::manual::EmbedderOptions {
|
||||||
dimensions,
|
dimensions,
|
||||||
@ -564,6 +632,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
|||||||
response: Setting::NotSet,
|
response: Setting::NotSet,
|
||||||
headers: Setting::NotSet,
|
headers: Setting::NotSet,
|
||||||
distribution: Setting::some_or_not_set(distribution),
|
distribution: Setting::some_or_not_set(distribution),
|
||||||
|
binary_quantized: Setting::some_or_not_set(quantized),
|
||||||
},
|
},
|
||||||
super::EmbedderOptions::Rest(super::rest::EmbedderOptions {
|
super::EmbedderOptions::Rest(super::rest::EmbedderOptions {
|
||||||
api_key,
|
api_key,
|
||||||
@ -586,6 +655,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
|||||||
response: Setting::Set(response),
|
response: Setting::Set(response),
|
||||||
distribution: Setting::some_or_not_set(distribution),
|
distribution: Setting::some_or_not_set(distribution),
|
||||||
headers: Setting::Set(headers),
|
headers: Setting::Set(headers),
|
||||||
|
binary_quantized: Setting::some_or_not_set(quantized),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -607,8 +677,11 @@ impl From<EmbeddingSettings> for EmbeddingConfig {
|
|||||||
response,
|
response,
|
||||||
distribution,
|
distribution,
|
||||||
headers,
|
headers,
|
||||||
|
binary_quantized,
|
||||||
} = value;
|
} = value;
|
||||||
|
|
||||||
|
this.quantized = binary_quantized.set();
|
||||||
|
|
||||||
if let Some(source) = source.set() {
|
if let Some(source) = source.set() {
|
||||||
match source {
|
match source {
|
||||||
EmbedderSource::OpenAi => {
|
EmbedderSource::OpenAi => {
|
||||||
|
Loading…
Reference in New Issue
Block a user