mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-02-20 17:28:28 +01:00
Merge #5351
5351: Bring back v1.13.0 changes into main r=irevoire a=Kerollmops This PR brings back the changes made in v1.13 into the main branch. Co-authored-by: ManyTheFish <many@meilisearch.com> Co-authored-by: Kerollmops <clement@meilisearch.com> Co-authored-by: Louis Dureuil <louis@meilisearch.com> Co-authored-by: Clémentine <clementine@meilisearch.com> Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com> Co-authored-by: Tamo <tamo@meilisearch.com> Co-authored-by: Clément Renault <clement@meilisearch.com>
This commit is contained in:
commit
d86035ff43
2
.github/workflows/sdks-tests.yml
vendored
2
.github/workflows/sdks-tests.yml
vendored
@ -52,7 +52,7 @@ jobs:
|
||||
- name: Setup .NET Core
|
||||
uses: actions/setup-dotnet@v4
|
||||
with:
|
||||
dotnet-version: "6.0.x"
|
||||
dotnet-version: "8.0.x"
|
||||
- name: Install dependencies
|
||||
run: dotnet restore
|
||||
- name: Build
|
||||
|
@ -10,8 +10,10 @@ dump
|
||||
├── instance-uid.uuid
|
||||
├── keys.jsonl
|
||||
├── metadata.json
|
||||
└── tasks
|
||||
├── update_files
|
||||
│ └── [task_id].jsonl
|
||||
├── tasks
|
||||
│ ├── update_files
|
||||
│ │ └── [task_id].jsonl
|
||||
│ └── queue.jsonl
|
||||
└── batches
|
||||
└── queue.jsonl
|
||||
```
|
||||
```
|
||||
|
@ -228,14 +228,16 @@ pub(crate) mod test {
|
||||
|
||||
use big_s::S;
|
||||
use maplit::{btreemap, btreeset};
|
||||
use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchStats};
|
||||
use meilisearch_types::facet_values_sort::FacetValuesSort;
|
||||
use meilisearch_types::features::RuntimeTogglableFeatures;
|
||||
use meilisearch_types::features::{Network, Remote, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
||||
use meilisearch_types::keys::{Action, Key};
|
||||
use meilisearch_types::milli;
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use meilisearch_types::settings::{Checked, FacetingSettings, Settings};
|
||||
use meilisearch_types::tasks::{Details, Status};
|
||||
use meilisearch_types::task_view::DetailsView;
|
||||
use meilisearch_types::tasks::{Details, Kind, Status};
|
||||
use serde_json::{json, Map, Value};
|
||||
use time::macros::datetime;
|
||||
use uuid::Uuid;
|
||||
@ -305,6 +307,30 @@ pub(crate) mod test {
|
||||
settings.check()
|
||||
}
|
||||
|
||||
pub fn create_test_batches() -> Vec<Batch> {
|
||||
vec![Batch {
|
||||
uid: 0,
|
||||
details: DetailsView {
|
||||
received_documents: Some(12),
|
||||
indexed_documents: Some(Some(10)),
|
||||
..DetailsView::default()
|
||||
},
|
||||
progress: None,
|
||||
stats: BatchStats {
|
||||
total_nb_tasks: 1,
|
||||
status: maplit::btreemap! { Status::Succeeded => 1 },
|
||||
types: maplit::btreemap! { Kind::DocumentAdditionOrUpdate => 1 },
|
||||
index_uids: maplit::btreemap! { "doggo".to_string() => 1 },
|
||||
},
|
||||
enqueued_at: Some(BatchEnqueuedAt {
|
||||
earliest: datetime!(2022-11-11 0:00 UTC),
|
||||
oldest: datetime!(2022-11-11 0:00 UTC),
|
||||
}),
|
||||
started_at: datetime!(2022-11-20 0:00 UTC),
|
||||
finished_at: Some(datetime!(2022-11-21 0:00 UTC)),
|
||||
}]
|
||||
}
|
||||
|
||||
pub fn create_test_tasks() -> Vec<(TaskDump, Option<Vec<Document>>)> {
|
||||
vec![
|
||||
(
|
||||
@ -427,6 +453,15 @@ pub(crate) mod test {
|
||||
index.flush().unwrap();
|
||||
index.settings(&settings).unwrap();
|
||||
|
||||
// ========== pushing the batch queue
|
||||
let batches = create_test_batches();
|
||||
|
||||
let mut batch_queue = dump.create_batches_queue().unwrap();
|
||||
for batch in &batches {
|
||||
batch_queue.push_batch(batch).unwrap();
|
||||
}
|
||||
batch_queue.flush().unwrap();
|
||||
|
||||
// ========== pushing the task queue
|
||||
let tasks = create_test_tasks();
|
||||
|
||||
@ -455,6 +490,10 @@ pub(crate) mod test {
|
||||
|
||||
dump.create_experimental_features(features).unwrap();
|
||||
|
||||
// ========== network
|
||||
let network = create_test_network();
|
||||
dump.create_network(network).unwrap();
|
||||
|
||||
// create the dump
|
||||
let mut file = tempfile::tempfile().unwrap();
|
||||
dump.persist_to(&mut file).unwrap();
|
||||
@ -467,6 +506,13 @@ pub(crate) mod test {
|
||||
RuntimeTogglableFeatures::default()
|
||||
}
|
||||
|
||||
fn create_test_network() -> Network {
|
||||
Network {
|
||||
local: Some("myself".to_string()),
|
||||
remotes: maplit::btreemap! {"other".to_string() => Remote { url: "http://test".to_string(), search_api_key: Some("apiKey".to_string()) }},
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_creating_and_read_dump() {
|
||||
let mut file = create_test_dump();
|
||||
@ -515,5 +561,9 @@ pub(crate) mod test {
|
||||
// ==== checking the features
|
||||
let expected = create_test_features();
|
||||
assert_eq!(dump.features().unwrap().unwrap(), expected);
|
||||
|
||||
// ==== checking the network
|
||||
let expected = create_test_network();
|
||||
assert_eq!(&expected, dump.network().unwrap().unwrap());
|
||||
}
|
||||
}
|
||||
|
@ -196,6 +196,10 @@ impl CompatV5ToV6 {
|
||||
pub fn features(&self) -> Result<Option<v6::RuntimeTogglableFeatures>> {
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
pub fn network(&self) -> Result<Option<&v6::Network>> {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
pub enum CompatIndexV5ToV6 {
|
||||
|
@ -23,6 +23,7 @@ mod v6;
|
||||
pub type Document = serde_json::Map<String, serde_json::Value>;
|
||||
pub type UpdateFile = dyn Iterator<Item = Result<Document>>;
|
||||
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
pub enum DumpReader {
|
||||
Current(V6Reader),
|
||||
Compat(CompatV5ToV6),
|
||||
@ -101,6 +102,13 @@ impl DumpReader {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn batches(&mut self) -> Result<Box<dyn Iterator<Item = Result<v6::Batch>> + '_>> {
|
||||
match self {
|
||||
DumpReader::Current(current) => Ok(current.batches()),
|
||||
DumpReader::Compat(_compat) => Ok(Box::new(std::iter::empty())),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn keys(&mut self) -> Result<Box<dyn Iterator<Item = Result<v6::Key>> + '_>> {
|
||||
match self {
|
||||
DumpReader::Current(current) => Ok(current.keys()),
|
||||
@ -114,6 +122,13 @@ impl DumpReader {
|
||||
DumpReader::Compat(compat) => compat.features(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn network(&self) -> Result<Option<&v6::Network>> {
|
||||
match self {
|
||||
DumpReader::Current(current) => Ok(current.network()),
|
||||
DumpReader::Compat(compat) => compat.network(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<V6Reader> for DumpReader {
|
||||
@ -219,6 +234,10 @@ pub(crate) mod test {
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2024-05-16 15:51:34.151044 +00:00:00");
|
||||
insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None");
|
||||
|
||||
// batches didn't exists at the time
|
||||
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
|
||||
@ -328,6 +347,7 @@ pub(crate) mod test {
|
||||
}
|
||||
|
||||
assert_eq!(dump.features().unwrap().unwrap(), RuntimeTogglableFeatures::default());
|
||||
assert_eq!(dump.network().unwrap(), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -339,6 +359,10 @@ pub(crate) mod test {
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2023-07-06 7:10:27.21958 +00:00:00");
|
||||
insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None");
|
||||
|
||||
// batches didn't exists at the time
|
||||
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
|
||||
@ -373,6 +397,27 @@ pub(crate) mod test {
|
||||
assert_eq!(dump.features().unwrap().unwrap(), RuntimeTogglableFeatures::default());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn import_dump_v6_network() {
|
||||
let dump = File::open("tests/assets/v6-with-network.dump").unwrap();
|
||||
let dump = DumpReader::open(dump).unwrap();
|
||||
|
||||
// top level infos
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2025-01-29 15:45:32.738676 +00:00:00");
|
||||
insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None");
|
||||
|
||||
// network
|
||||
|
||||
let network = dump.network().unwrap().unwrap();
|
||||
insta::assert_snapshot!(network.local.as_ref().unwrap(), @"ms-0");
|
||||
insta::assert_snapshot!(network.remotes.get("ms-0").as_ref().unwrap().url, @"http://localhost:7700");
|
||||
insta::assert_snapshot!(network.remotes.get("ms-0").as_ref().unwrap().search_api_key.is_none(), @"true");
|
||||
insta::assert_snapshot!(network.remotes.get("ms-1").as_ref().unwrap().url, @"http://localhost:7701");
|
||||
insta::assert_snapshot!(network.remotes.get("ms-1").as_ref().unwrap().search_api_key.is_none(), @"true");
|
||||
insta::assert_snapshot!(network.remotes.get("ms-2").as_ref().unwrap().url, @"http://ms-5679.example.meilisearch.io");
|
||||
insta::assert_snapshot!(network.remotes.get("ms-2").as_ref().unwrap().search_api_key.as_ref().unwrap(), @"foo");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn import_dump_v5() {
|
||||
let dump = File::open("tests/assets/v5.dump").unwrap();
|
||||
@ -382,6 +427,10 @@ pub(crate) mod test {
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00");
|
||||
insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
|
||||
|
||||
// batches didn't exists at the time
|
||||
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
|
||||
@ -462,6 +511,10 @@ pub(crate) mod test {
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00");
|
||||
insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
|
||||
|
||||
// batches didn't exists at the time
|
||||
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
|
||||
@ -539,6 +592,10 @@ pub(crate) mod test {
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00");
|
||||
assert_eq!(dump.instance_uid().unwrap(), None);
|
||||
|
||||
// batches didn't exists at the time
|
||||
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
|
||||
@ -632,6 +689,10 @@ pub(crate) mod test {
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00");
|
||||
assert_eq!(dump.instance_uid().unwrap(), None);
|
||||
|
||||
// batches didn't exists at the time
|
||||
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
|
||||
@ -725,6 +786,10 @@ pub(crate) mod test {
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2023-01-30 16:26:09.247261 +00:00:00");
|
||||
assert_eq!(dump.instance_uid().unwrap(), None);
|
||||
|
||||
// batches didn't exists at the time
|
||||
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
|
||||
@ -801,6 +866,10 @@ pub(crate) mod test {
|
||||
assert_eq!(dump.date(), None);
|
||||
assert_eq!(dump.instance_uid().unwrap(), None);
|
||||
|
||||
// batches didn't exists at the time
|
||||
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
|
||||
|
@ -18,8 +18,10 @@ pub type Checked = meilisearch_types::settings::Checked;
|
||||
pub type Unchecked = meilisearch_types::settings::Unchecked;
|
||||
|
||||
pub type Task = crate::TaskDump;
|
||||
pub type Batch = meilisearch_types::batches::Batch;
|
||||
pub type Key = meilisearch_types::keys::Key;
|
||||
pub type RuntimeTogglableFeatures = meilisearch_types::features::RuntimeTogglableFeatures;
|
||||
pub type Network = meilisearch_types::features::Network;
|
||||
|
||||
// ===== Other types to clarify the code of the compat module
|
||||
// everything related to the tasks
|
||||
@ -48,8 +50,10 @@ pub struct V6Reader {
|
||||
instance_uid: Option<Uuid>,
|
||||
metadata: Metadata,
|
||||
tasks: BufReader<File>,
|
||||
batches: Option<BufReader<File>>,
|
||||
keys: BufReader<File>,
|
||||
features: Option<RuntimeTogglableFeatures>,
|
||||
network: Option<Network>,
|
||||
}
|
||||
|
||||
impl V6Reader {
|
||||
@ -77,13 +81,38 @@ impl V6Reader {
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let batches = match File::open(dump.path().join("batches").join("queue.jsonl")) {
|
||||
Ok(file) => Some(BufReader::new(file)),
|
||||
// The batch file was only introduced during the v1.13, anything prior to that won't have batches
|
||||
Err(err) if err.kind() == ErrorKind::NotFound => None,
|
||||
Err(e) => return Err(e.into()),
|
||||
};
|
||||
|
||||
let network_file = match fs::read(dump.path().join("network.json")) {
|
||||
Ok(network_file) => Some(network_file),
|
||||
Err(error) => match error.kind() {
|
||||
// Allows the file to be missing, this will only result in all experimental features disabled.
|
||||
ErrorKind::NotFound => {
|
||||
debug!("`network.json` not found in dump");
|
||||
None
|
||||
}
|
||||
_ => return Err(error.into()),
|
||||
},
|
||||
};
|
||||
let network = if let Some(network_file) = network_file {
|
||||
Some(serde_json::from_reader(&*network_file)?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Ok(V6Reader {
|
||||
metadata: serde_json::from_reader(&*meta_file)?,
|
||||
instance_uid,
|
||||
tasks: BufReader::new(File::open(dump.path().join("tasks").join("queue.jsonl"))?),
|
||||
batches,
|
||||
keys: BufReader::new(File::open(dump.path().join("keys.jsonl"))?),
|
||||
features,
|
||||
network,
|
||||
dump,
|
||||
})
|
||||
}
|
||||
@ -124,7 +153,7 @@ impl V6Reader {
|
||||
&mut self,
|
||||
) -> Box<dyn Iterator<Item = Result<(Task, Option<Box<super::UpdateFile>>)>> + '_> {
|
||||
Box::new((&mut self.tasks).lines().map(|line| -> Result<_> {
|
||||
let task: Task = serde_json::from_str(&line?).unwrap();
|
||||
let task: Task = serde_json::from_str(&line?)?;
|
||||
|
||||
let update_file_path = self
|
||||
.dump
|
||||
@ -136,8 +165,7 @@ impl V6Reader {
|
||||
if update_file_path.exists() {
|
||||
Ok((
|
||||
task,
|
||||
Some(Box::new(UpdateFile::new(&update_file_path).unwrap())
|
||||
as Box<super::UpdateFile>),
|
||||
Some(Box::new(UpdateFile::new(&update_file_path)?) as Box<super::UpdateFile>),
|
||||
))
|
||||
} else {
|
||||
Ok((task, None))
|
||||
@ -145,6 +173,16 @@ impl V6Reader {
|
||||
}))
|
||||
}
|
||||
|
||||
pub fn batches(&mut self) -> Box<dyn Iterator<Item = Result<Batch>> + '_> {
|
||||
match self.batches.as_mut() {
|
||||
Some(batches) => Box::new((batches).lines().map(|line| -> Result<_> {
|
||||
let batch = serde_json::from_str(&line?)?;
|
||||
Ok(batch)
|
||||
})),
|
||||
None => Box::new(std::iter::empty()) as Box<dyn Iterator<Item = Result<Batch>> + '_>,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn keys(&mut self) -> Box<dyn Iterator<Item = Result<Key>> + '_> {
|
||||
Box::new(
|
||||
(&mut self.keys).lines().map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }),
|
||||
@ -154,6 +192,10 @@ impl V6Reader {
|
||||
pub fn features(&self) -> Option<RuntimeTogglableFeatures> {
|
||||
self.features
|
||||
}
|
||||
|
||||
pub fn network(&self) -> Option<&Network> {
|
||||
self.network.as_ref()
|
||||
}
|
||||
}
|
||||
|
||||
pub struct UpdateFile {
|
||||
|
@ -4,7 +4,8 @@ use std::path::PathBuf;
|
||||
|
||||
use flate2::write::GzEncoder;
|
||||
use flate2::Compression;
|
||||
use meilisearch_types::features::RuntimeTogglableFeatures;
|
||||
use meilisearch_types::batches::Batch;
|
||||
use meilisearch_types::features::{Network, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::keys::Key;
|
||||
use meilisearch_types::settings::{Checked, Settings};
|
||||
use serde_json::{Map, Value};
|
||||
@ -54,6 +55,10 @@ impl DumpWriter {
|
||||
TaskWriter::new(self.dir.path().join("tasks"))
|
||||
}
|
||||
|
||||
pub fn create_batches_queue(&self) -> Result<BatchWriter> {
|
||||
BatchWriter::new(self.dir.path().join("batches"))
|
||||
}
|
||||
|
||||
pub fn create_experimental_features(&self, features: RuntimeTogglableFeatures) -> Result<()> {
|
||||
Ok(std::fs::write(
|
||||
self.dir.path().join("experimental-features.json"),
|
||||
@ -61,6 +66,10 @@ impl DumpWriter {
|
||||
)?)
|
||||
}
|
||||
|
||||
pub fn create_network(&self, network: Network) -> Result<()> {
|
||||
Ok(std::fs::write(self.dir.path().join("network.json"), serde_json::to_string(&network)?)?)
|
||||
}
|
||||
|
||||
pub fn persist_to(self, mut writer: impl Write) -> Result<()> {
|
||||
let gz_encoder = GzEncoder::new(&mut writer, Compression::default());
|
||||
let mut tar_encoder = tar::Builder::new(gz_encoder);
|
||||
@ -84,7 +93,7 @@ impl KeyWriter {
|
||||
}
|
||||
|
||||
pub fn push_key(&mut self, key: &Key) -> Result<()> {
|
||||
self.keys.write_all(&serde_json::to_vec(key)?)?;
|
||||
serde_json::to_writer(&mut self.keys, &key)?;
|
||||
self.keys.write_all(b"\n")?;
|
||||
Ok(())
|
||||
}
|
||||
@ -114,7 +123,7 @@ impl TaskWriter {
|
||||
/// Pushes tasks in the dump.
|
||||
/// If the tasks has an associated `update_file` it'll use the `task_id` as its name.
|
||||
pub fn push_task(&mut self, task: &TaskDump) -> Result<UpdateFile> {
|
||||
self.queue.write_all(&serde_json::to_vec(task)?)?;
|
||||
serde_json::to_writer(&mut self.queue, &task)?;
|
||||
self.queue.write_all(b"\n")?;
|
||||
|
||||
Ok(UpdateFile::new(self.update_files.join(format!("{}.jsonl", task.uid))))
|
||||
@ -126,6 +135,30 @@ impl TaskWriter {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct BatchWriter {
|
||||
queue: BufWriter<File>,
|
||||
}
|
||||
|
||||
impl BatchWriter {
|
||||
pub(crate) fn new(path: PathBuf) -> Result<Self> {
|
||||
std::fs::create_dir(&path)?;
|
||||
let queue = File::create(path.join("queue.jsonl"))?;
|
||||
Ok(BatchWriter { queue: BufWriter::new(queue) })
|
||||
}
|
||||
|
||||
/// Pushes batches in the dump.
|
||||
pub fn push_batch(&mut self, batch: &Batch) -> Result<()> {
|
||||
serde_json::to_writer(&mut self.queue, &batch)?;
|
||||
self.queue.write_all(b"\n")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn flush(mut self) -> Result<()> {
|
||||
self.queue.flush()?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct UpdateFile {
|
||||
path: PathBuf,
|
||||
writer: Option<BufWriter<File>>,
|
||||
@ -137,8 +170,8 @@ impl UpdateFile {
|
||||
}
|
||||
|
||||
pub fn push_document(&mut self, document: &Document) -> Result<()> {
|
||||
if let Some(writer) = self.writer.as_mut() {
|
||||
writer.write_all(&serde_json::to_vec(document)?)?;
|
||||
if let Some(mut writer) = self.writer.as_mut() {
|
||||
serde_json::to_writer(&mut writer, &document)?;
|
||||
writer.write_all(b"\n")?;
|
||||
} else {
|
||||
let file = File::create(&self.path).unwrap();
|
||||
@ -205,8 +238,8 @@ pub(crate) mod test {
|
||||
use super::*;
|
||||
use crate::reader::Document;
|
||||
use crate::test::{
|
||||
create_test_api_keys, create_test_documents, create_test_dump, create_test_instance_uid,
|
||||
create_test_settings, create_test_tasks,
|
||||
create_test_api_keys, create_test_batches, create_test_documents, create_test_dump,
|
||||
create_test_instance_uid, create_test_settings, create_test_tasks,
|
||||
};
|
||||
|
||||
fn create_directory_hierarchy(dir: &Path) -> String {
|
||||
@ -281,8 +314,10 @@ pub(crate) mod test {
|
||||
let dump_path = dump.path();
|
||||
|
||||
// ==== checking global file hierarchy (we want to be sure there isn't too many files or too few)
|
||||
insta::assert_snapshot!(create_directory_hierarchy(dump_path), @r###"
|
||||
insta::assert_snapshot!(create_directory_hierarchy(dump_path), @r"
|
||||
.
|
||||
├---- batches/
|
||||
│ └---- queue.jsonl
|
||||
├---- indexes/
|
||||
│ └---- doggos/
|
||||
│ │ ├---- documents.jsonl
|
||||
@ -295,8 +330,9 @@ pub(crate) mod test {
|
||||
├---- experimental-features.json
|
||||
├---- instance_uid.uuid
|
||||
├---- keys.jsonl
|
||||
└---- metadata.json
|
||||
"###);
|
||||
├---- metadata.json
|
||||
└---- network.json
|
||||
");
|
||||
|
||||
// ==== checking the top level infos
|
||||
let metadata = fs::read_to_string(dump_path.join("metadata.json")).unwrap();
|
||||
@ -349,6 +385,16 @@ pub(crate) mod test {
|
||||
}
|
||||
}
|
||||
|
||||
// ==== checking the batch queue
|
||||
let batches_queue = fs::read_to_string(dump_path.join("batches/queue.jsonl")).unwrap();
|
||||
for (batch, expected) in batches_queue.lines().zip(create_test_batches()) {
|
||||
let mut batch = serde_json::from_str::<Batch>(batch).unwrap();
|
||||
if batch.details.settings == Some(Box::new(Settings::<Unchecked>::default())) {
|
||||
batch.details.settings = None;
|
||||
}
|
||||
assert_eq!(batch, expected, "{batch:#?}{expected:#?}");
|
||||
}
|
||||
|
||||
// ==== checking the keys
|
||||
let keys = fs::read_to_string(dump_path.join("keys.jsonl")).unwrap();
|
||||
for (key, expected) in keys.lines().zip(create_test_api_keys()) {
|
||||
|
BIN
crates/dump/tests/assets/v6-with-network.dump
Normal file
BIN
crates/dump/tests/assets/v6-with-network.dump
Normal file
Binary file not shown.
@ -2,6 +2,7 @@ use std::collections::HashMap;
|
||||
use std::io;
|
||||
|
||||
use dump::{KindDump, TaskDump, UpdateFile};
|
||||
use meilisearch_types::batches::{Batch, BatchId};
|
||||
use meilisearch_types::heed::RwTxn;
|
||||
use meilisearch_types::milli;
|
||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
|
||||
@ -14,9 +15,15 @@ pub struct Dump<'a> {
|
||||
index_scheduler: &'a IndexScheduler,
|
||||
wtxn: RwTxn<'a>,
|
||||
|
||||
batch_to_task_mapping: HashMap<BatchId, RoaringBitmap>,
|
||||
|
||||
indexes: HashMap<String, RoaringBitmap>,
|
||||
statuses: HashMap<Status, RoaringBitmap>,
|
||||
kinds: HashMap<Kind, RoaringBitmap>,
|
||||
|
||||
batch_indexes: HashMap<String, RoaringBitmap>,
|
||||
batch_statuses: HashMap<Status, RoaringBitmap>,
|
||||
batch_kinds: HashMap<Kind, RoaringBitmap>,
|
||||
}
|
||||
|
||||
impl<'a> Dump<'a> {
|
||||
@ -27,12 +34,72 @@ impl<'a> Dump<'a> {
|
||||
Ok(Dump {
|
||||
index_scheduler,
|
||||
wtxn,
|
||||
batch_to_task_mapping: HashMap::new(),
|
||||
indexes: HashMap::new(),
|
||||
statuses: HashMap::new(),
|
||||
kinds: HashMap::new(),
|
||||
batch_indexes: HashMap::new(),
|
||||
batch_statuses: HashMap::new(),
|
||||
batch_kinds: HashMap::new(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Register a new batch coming from a dump in the scheduler.
|
||||
/// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running.
|
||||
pub fn register_dumped_batch(&mut self, batch: Batch) -> Result<()> {
|
||||
self.index_scheduler.queue.batches.all_batches.put(&mut self.wtxn, &batch.uid, &batch)?;
|
||||
if let Some(enqueued_at) = batch.enqueued_at {
|
||||
utils::insert_task_datetime(
|
||||
&mut self.wtxn,
|
||||
self.index_scheduler.queue.batches.enqueued_at,
|
||||
enqueued_at.earliest,
|
||||
batch.uid,
|
||||
)?;
|
||||
utils::insert_task_datetime(
|
||||
&mut self.wtxn,
|
||||
self.index_scheduler.queue.batches.enqueued_at,
|
||||
enqueued_at.oldest,
|
||||
batch.uid,
|
||||
)?;
|
||||
}
|
||||
utils::insert_task_datetime(
|
||||
&mut self.wtxn,
|
||||
self.index_scheduler.queue.batches.started_at,
|
||||
batch.started_at,
|
||||
batch.uid,
|
||||
)?;
|
||||
if let Some(finished_at) = batch.finished_at {
|
||||
utils::insert_task_datetime(
|
||||
&mut self.wtxn,
|
||||
self.index_scheduler.queue.batches.finished_at,
|
||||
finished_at,
|
||||
batch.uid,
|
||||
)?;
|
||||
}
|
||||
|
||||
for index in batch.stats.index_uids.keys() {
|
||||
match self.batch_indexes.get_mut(index) {
|
||||
Some(bitmap) => {
|
||||
bitmap.insert(batch.uid);
|
||||
}
|
||||
None => {
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
bitmap.insert(batch.uid);
|
||||
self.batch_indexes.insert(index.to_string(), bitmap);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
for status in batch.stats.status.keys() {
|
||||
self.batch_statuses.entry(*status).or_default().insert(batch.uid);
|
||||
}
|
||||
for kind in batch.stats.types.keys() {
|
||||
self.batch_kinds.entry(*kind).or_default().insert(batch.uid);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Register a new task coming from a dump in the scheduler.
|
||||
/// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running.
|
||||
pub fn register_dumped_task(
|
||||
@ -149,6 +216,9 @@ impl<'a> Dump<'a> {
|
||||
};
|
||||
|
||||
self.index_scheduler.queue.tasks.all_tasks.put(&mut self.wtxn, &task.uid, &task)?;
|
||||
if let Some(batch_id) = task.batch_uid {
|
||||
self.batch_to_task_mapping.entry(batch_id).or_default().insert(task.uid);
|
||||
}
|
||||
|
||||
for index in task.indexes() {
|
||||
match self.indexes.get_mut(index) {
|
||||
@ -198,6 +268,14 @@ impl<'a> Dump<'a> {
|
||||
|
||||
/// Commit all the changes and exit the importing dump state
|
||||
pub fn finish(mut self) -> Result<()> {
|
||||
for (batch_id, task_ids) in self.batch_to_task_mapping {
|
||||
self.index_scheduler.queue.batch_to_tasks_mapping.put(
|
||||
&mut self.wtxn,
|
||||
&batch_id,
|
||||
&task_ids,
|
||||
)?;
|
||||
}
|
||||
|
||||
for (index, bitmap) in self.indexes {
|
||||
self.index_scheduler.queue.tasks.index_tasks.put(&mut self.wtxn, &index, &bitmap)?;
|
||||
}
|
||||
@ -208,6 +286,16 @@ impl<'a> Dump<'a> {
|
||||
self.index_scheduler.queue.tasks.put_kind(&mut self.wtxn, kind, &bitmap)?;
|
||||
}
|
||||
|
||||
for (index, bitmap) in self.batch_indexes {
|
||||
self.index_scheduler.queue.batches.index_tasks.put(&mut self.wtxn, &index, &bitmap)?;
|
||||
}
|
||||
for (status, bitmap) in self.batch_statuses {
|
||||
self.index_scheduler.queue.batches.put_status(&mut self.wtxn, status, &bitmap)?;
|
||||
}
|
||||
for (kind, bitmap) in self.batch_kinds {
|
||||
self.index_scheduler.queue.batches.put_kind(&mut self.wtxn, kind, &bitmap)?;
|
||||
}
|
||||
|
||||
self.wtxn.commit()?;
|
||||
self.index_scheduler.scheduler.wake_up.signal();
|
||||
|
||||
|
@ -109,6 +109,8 @@ pub enum Error {
|
||||
InvalidIndexUid { index_uid: String },
|
||||
#[error("Task `{0}` not found.")]
|
||||
TaskNotFound(TaskId),
|
||||
#[error("Task `{0}` does not contain any documents. Only `documentAdditionOrUpdate` tasks with the statuses `enqueued` or `processing` contain documents")]
|
||||
TaskFileNotFound(TaskId),
|
||||
#[error("Batch `{0}` not found.")]
|
||||
BatchNotFound(BatchId),
|
||||
#[error("Query parameters to filter the tasks to delete are missing. Available query parameters are: `uids`, `indexUids`, `statuses`, `types`, `canceledBy`, `beforeEnqueuedAt`, `afterEnqueuedAt`, `beforeStartedAt`, `afterStartedAt`, `beforeFinishedAt`, `afterFinishedAt`.")]
|
||||
@ -127,8 +129,8 @@ pub enum Error {
|
||||
_ => format!("{error}")
|
||||
})]
|
||||
Milli { error: milli::Error, index_uid: Option<String> },
|
||||
#[error("An unexpected crash occurred when processing the task.")]
|
||||
ProcessBatchPanicked,
|
||||
#[error("An unexpected crash occurred when processing the task: {0}")]
|
||||
ProcessBatchPanicked(String),
|
||||
#[error(transparent)]
|
||||
FileStore(#[from] file_store::Error),
|
||||
#[error(transparent)]
|
||||
@ -189,6 +191,7 @@ impl Error {
|
||||
| Error::InvalidTaskCanceledBy { .. }
|
||||
| Error::InvalidIndexUid { .. }
|
||||
| Error::TaskNotFound(_)
|
||||
| Error::TaskFileNotFound(_)
|
||||
| Error::BatchNotFound(_)
|
||||
| Error::TaskDeletionWithEmptyQuery
|
||||
| Error::TaskCancelationWithEmptyQuery
|
||||
@ -196,7 +199,7 @@ impl Error {
|
||||
| Error::Dump(_)
|
||||
| Error::Heed(_)
|
||||
| Error::Milli { .. }
|
||||
| Error::ProcessBatchPanicked
|
||||
| Error::ProcessBatchPanicked(_)
|
||||
| Error::FileStore(_)
|
||||
| Error::IoError(_)
|
||||
| Error::Persist(_)
|
||||
@ -250,6 +253,7 @@ impl ErrorCode for Error {
|
||||
Error::InvalidTaskCanceledBy { .. } => Code::InvalidTaskCanceledBy,
|
||||
Error::InvalidIndexUid { .. } => Code::InvalidIndexUid,
|
||||
Error::TaskNotFound(_) => Code::TaskNotFound,
|
||||
Error::TaskFileNotFound(_) => Code::TaskFileNotFound,
|
||||
Error::BatchNotFound(_) => Code::BatchNotFound,
|
||||
Error::TaskDeletionWithEmptyQuery => Code::MissingTaskFilters,
|
||||
Error::TaskCancelationWithEmptyQuery => Code::MissingTaskFilters,
|
||||
@ -257,7 +261,7 @@ impl ErrorCode for Error {
|
||||
Error::NoSpaceLeftInTaskQueue => Code::NoSpaceLeftOnDevice,
|
||||
Error::Dump(e) => e.error_code(),
|
||||
Error::Milli { error, .. } => error.error_code(),
|
||||
Error::ProcessBatchPanicked => Code::Internal,
|
||||
Error::ProcessBatchPanicked(_) => Code::Internal,
|
||||
Error::Heed(e) => e.error_code(),
|
||||
Error::HeedTransaction(e) => e.error_code(),
|
||||
Error::FileStore(e) => e.error_code(),
|
||||
|
@ -1,6 +1,6 @@
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::features::{InstanceTogglableFeatures, Network, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::heed::types::{SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, Env, RwTxn};
|
||||
|
||||
@ -14,10 +14,16 @@ mod db_name {
|
||||
pub const EXPERIMENTAL_FEATURES: &str = "experimental-features";
|
||||
}
|
||||
|
||||
mod db_keys {
|
||||
pub const EXPERIMENTAL_FEATURES: &str = "experimental-features";
|
||||
pub const NETWORK: &str = "network";
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct FeatureData {
|
||||
persisted: Database<Str, SerdeJson<RuntimeTogglableFeatures>>,
|
||||
runtime: Arc<RwLock<RuntimeTogglableFeatures>>,
|
||||
network: Arc<RwLock<Network>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
@ -86,6 +92,32 @@ impl RoFeatures {
|
||||
.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_network(&self, disabled_action: &'static str) -> Result<()> {
|
||||
if self.runtime.network {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(FeatureNotEnabledError {
|
||||
disabled_action,
|
||||
feature: "network",
|
||||
issue_link: "https://github.com/orgs/meilisearch/discussions/805",
|
||||
}
|
||||
.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_get_task_documents_route(&self) -> Result<()> {
|
||||
if self.runtime.get_task_documents_route {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(FeatureNotEnabledError {
|
||||
disabled_action: "Getting the documents of an enqueued task",
|
||||
feature: "get task documents route",
|
||||
issue_link: "https://github.com/orgs/meilisearch/discussions/808",
|
||||
}
|
||||
.into())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FeatureData {
|
||||
@ -102,7 +134,7 @@ impl FeatureData {
|
||||
env.create_database(wtxn, Some(db_name::EXPERIMENTAL_FEATURES))?;
|
||||
|
||||
let persisted_features: RuntimeTogglableFeatures =
|
||||
runtime_features_db.get(wtxn, db_name::EXPERIMENTAL_FEATURES)?.unwrap_or_default();
|
||||
runtime_features_db.get(wtxn, db_keys::EXPERIMENTAL_FEATURES)?.unwrap_or_default();
|
||||
let InstanceTogglableFeatures { metrics, logs_route, contains_filter } = instance_features;
|
||||
let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures {
|
||||
metrics: metrics || persisted_features.metrics,
|
||||
@ -111,7 +143,14 @@ impl FeatureData {
|
||||
..persisted_features
|
||||
}));
|
||||
|
||||
Ok(Self { persisted: runtime_features_db, runtime })
|
||||
let network_db = runtime_features_db.remap_data_type::<SerdeJson<Network>>();
|
||||
let network: Network = network_db.get(wtxn, db_keys::NETWORK)?.unwrap_or_default();
|
||||
|
||||
Ok(Self {
|
||||
persisted: runtime_features_db,
|
||||
runtime,
|
||||
network: Arc::new(RwLock::new(network)),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn put_runtime_features(
|
||||
@ -119,7 +158,7 @@ impl FeatureData {
|
||||
mut wtxn: RwTxn,
|
||||
features: RuntimeTogglableFeatures,
|
||||
) -> Result<()> {
|
||||
self.persisted.put(&mut wtxn, db_name::EXPERIMENTAL_FEATURES, &features)?;
|
||||
self.persisted.put(&mut wtxn, db_keys::EXPERIMENTAL_FEATURES, &features)?;
|
||||
wtxn.commit()?;
|
||||
|
||||
// safe to unwrap, the lock will only fail if:
|
||||
@ -140,4 +179,21 @@ impl FeatureData {
|
||||
pub fn features(&self) -> RoFeatures {
|
||||
RoFeatures::new(self)
|
||||
}
|
||||
|
||||
pub fn put_network(&self, mut wtxn: RwTxn, new_network: Network) -> Result<()> {
|
||||
self.persisted.remap_data_type::<SerdeJson<Network>>().put(
|
||||
&mut wtxn,
|
||||
db_keys::NETWORK,
|
||||
&new_network,
|
||||
)?;
|
||||
wtxn.commit()?;
|
||||
|
||||
let mut network = self.network.write().unwrap();
|
||||
*network = new_network;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn network(&self) -> Network {
|
||||
Network::clone(&*self.network.read().unwrap())
|
||||
}
|
||||
}
|
||||
|
@ -1,5 +1,7 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::env::VarError;
|
||||
use std::path::Path;
|
||||
use std::str::FromStr;
|
||||
use std::time::Duration;
|
||||
|
||||
use meilisearch_types::heed::{EnvClosingEvent, EnvFlags, EnvOpenOptions};
|
||||
@ -304,7 +306,18 @@ fn create_or_open_index(
|
||||
) -> Result<Index> {
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(clamp_to_page_size(map_size));
|
||||
options.max_readers(1024);
|
||||
|
||||
// You can find more details about this experimental
|
||||
// environment variable on the following GitHub discussion:
|
||||
// <https://github.com/orgs/meilisearch/discussions/806>
|
||||
let max_readers = match std::env::var("MEILI_EXPERIMENTAL_INDEX_MAX_READERS") {
|
||||
Ok(value) => u32::from_str(&value).unwrap(),
|
||||
Err(VarError::NotPresent) => 1024,
|
||||
Err(VarError::NotUnicode(value)) => panic!(
|
||||
"Invalid unicode for the `MEILI_EXPERIMENTAL_INDEX_MAX_READERS` env var: {value:?}"
|
||||
),
|
||||
};
|
||||
options.max_readers(max_readers);
|
||||
if enable_mdb_writemap {
|
||||
unsafe { options.flags(EnvFlags::WRITE_MAP) };
|
||||
}
|
||||
|
@ -106,6 +106,12 @@ pub struct IndexStats {
|
||||
/// are not returned to the disk after a deletion, this number is typically larger than
|
||||
/// `used_database_size` that only includes the size of the used pages.
|
||||
pub database_size: u64,
|
||||
/// Number of embeddings in the index.
|
||||
/// Option: retrocompatible with the stats of the pre-v1.13.0 versions of meilisearch
|
||||
pub number_of_embeddings: Option<u64>,
|
||||
/// Number of embedded documents in the index.
|
||||
/// Option: retrocompatible with the stats of the pre-v1.13.0 versions of meilisearch
|
||||
pub number_of_embedded_documents: Option<u64>,
|
||||
/// Size taken by the used pages of the index' DB, in bytes.
|
||||
///
|
||||
/// As the DB backend does not return to the disk the pages that are not currently used by the DB,
|
||||
@ -130,8 +136,11 @@ impl IndexStats {
|
||||
///
|
||||
/// - rtxn: a RO transaction for the index, obtained from `Index::read_txn()`.
|
||||
pub fn new(index: &Index, rtxn: &RoTxn) -> milli::Result<Self> {
|
||||
let arroy_stats = index.arroy_stats(rtxn)?;
|
||||
Ok(IndexStats {
|
||||
number_of_documents: index.number_of_documents(rtxn)?,
|
||||
number_of_embeddings: Some(arroy_stats.number_of_embeddings),
|
||||
number_of_embedded_documents: Some(arroy_stats.documents.len()),
|
||||
database_size: index.on_disk_size()?,
|
||||
used_database_size: index.used_size()?,
|
||||
primary_key: index.primary_key(rtxn)?.map(|s| s.to_string()),
|
||||
|
@ -1,7 +1,7 @@
|
||||
use std::collections::BTreeSet;
|
||||
use std::fmt::Write;
|
||||
|
||||
use meilisearch_types::batches::Batch;
|
||||
use meilisearch_types::batches::{Batch, BatchEnqueuedAt};
|
||||
use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, RoTxn};
|
||||
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
|
||||
@ -341,10 +341,14 @@ pub fn snapshot_canceled_by(rtxn: &RoTxn, db: Database<BEU32, RoaringBitmapCodec
|
||||
|
||||
pub fn snapshot_batch(batch: &Batch) -> String {
|
||||
let mut snap = String::new();
|
||||
let Batch { uid, details, stats, started_at, finished_at, progress: _ } = batch;
|
||||
let Batch { uid, details, stats, started_at, finished_at, progress: _, enqueued_at } = batch;
|
||||
if let Some(finished_at) = finished_at {
|
||||
assert!(finished_at > started_at);
|
||||
}
|
||||
let BatchEnqueuedAt { earliest, oldest } = enqueued_at.unwrap();
|
||||
assert!(*started_at > earliest);
|
||||
assert!(earliest >= oldest);
|
||||
|
||||
snap.push('{');
|
||||
snap.push_str(&format!("uid: {uid}, "));
|
||||
snap.push_str(&format!("details: {}, ", serde_json::to_string(details).unwrap()));
|
||||
|
@ -33,7 +33,7 @@ mod test_utils;
|
||||
pub mod upgrade;
|
||||
mod utils;
|
||||
pub mod uuid_codec;
|
||||
mod versioning;
|
||||
pub mod versioning;
|
||||
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
pub type TaskId = u32;
|
||||
@ -51,7 +51,7 @@ pub use features::RoFeatures;
|
||||
use flate2::bufread::GzEncoder;
|
||||
use flate2::Compression;
|
||||
use meilisearch_types::batches::Batch;
|
||||
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::features::{InstanceTogglableFeatures, Network, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::heed::byteorder::BE;
|
||||
use meilisearch_types::heed::types::I128;
|
||||
use meilisearch_types::heed::{self, Env, RoTxn};
|
||||
@ -770,7 +770,16 @@ impl IndexScheduler {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// TODO: consider using a type alias or a struct embedder/template
|
||||
pub fn put_network(&self, network: Network) -> Result<()> {
|
||||
let wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?;
|
||||
self.features.put_network(wtxn, network)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn network(&self) -> Network {
|
||||
self.features.network()
|
||||
}
|
||||
|
||||
pub fn embedders(
|
||||
&self,
|
||||
index_uid: String,
|
||||
|
@ -96,6 +96,7 @@ make_enum_progress! {
|
||||
StartTheDumpCreation,
|
||||
DumpTheApiKeys,
|
||||
DumpTheTasks,
|
||||
DumpTheBatches,
|
||||
DumpTheIndexes,
|
||||
DumpTheExperimentalFeatures,
|
||||
CompressTheDump,
|
||||
|
@ -12,8 +12,8 @@ use time::OffsetDateTime;
|
||||
use super::{Query, Queue};
|
||||
use crate::processing::ProcessingTasks;
|
||||
use crate::utils::{
|
||||
insert_task_datetime, keep_ids_within_datetimes, map_bound, remove_task_datetime,
|
||||
ProcessingBatch,
|
||||
insert_task_datetime, keep_ids_within_datetimes, map_bound,
|
||||
remove_n_tasks_datetime_earlier_than, remove_task_datetime, ProcessingBatch,
|
||||
};
|
||||
use crate::{Error, Result, BEI128};
|
||||
|
||||
@ -181,6 +181,7 @@ impl BatchQueue {
|
||||
stats: batch.stats,
|
||||
started_at: batch.started_at,
|
||||
finished_at: batch.finished_at,
|
||||
enqueued_at: batch.enqueued_at,
|
||||
},
|
||||
)?;
|
||||
|
||||
@ -234,34 +235,25 @@ impl BatchQueue {
|
||||
// What we know, though, is that the task date is from before the enqueued_at, and max two timestamps have been written
|
||||
// to the DB per batches.
|
||||
if let Some(ref old_batch) = old_batch {
|
||||
let started_at = old_batch.started_at.unix_timestamp_nanos();
|
||||
|
||||
// We have either one or two enqueued at to remove
|
||||
let mut exit = old_batch.stats.total_nb_tasks.clamp(0, 2);
|
||||
let mut iterator = self.enqueued_at.rev_iter_mut(wtxn)?;
|
||||
while let Some(entry) = iterator.next() {
|
||||
let (key, mut value) = entry?;
|
||||
if key > started_at {
|
||||
continue;
|
||||
}
|
||||
if value.remove(old_batch.uid) {
|
||||
exit = exit.saturating_sub(1);
|
||||
// Safe because the key and value are owned
|
||||
unsafe {
|
||||
iterator.put_current(&key, &value)?;
|
||||
}
|
||||
if exit == 0 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if let Some(enqueued_at) = old_batch.enqueued_at {
|
||||
remove_task_datetime(wtxn, self.enqueued_at, enqueued_at.earliest, old_batch.uid)?;
|
||||
remove_task_datetime(wtxn, self.enqueued_at, enqueued_at.oldest, old_batch.uid)?;
|
||||
} else {
|
||||
// If we don't have the enqueued at in the batch it means the database comes from the v1.12
|
||||
// and we still need to find the date by scrolling the database
|
||||
remove_n_tasks_datetime_earlier_than(
|
||||
wtxn,
|
||||
self.enqueued_at,
|
||||
old_batch.started_at,
|
||||
old_batch.stats.total_nb_tasks.clamp(1, 2) as usize,
|
||||
old_batch.uid,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
if let Some(enqueued_at) = batch.oldest_enqueued_at {
|
||||
insert_task_datetime(wtxn, self.enqueued_at, enqueued_at, batch.uid)?;
|
||||
}
|
||||
if let Some(enqueued_at) = batch.earliest_enqueued_at {
|
||||
insert_task_datetime(wtxn, self.enqueued_at, enqueued_at, batch.uid)?;
|
||||
}
|
||||
// A finished batch MUST contains at least one task and have an enqueued_at
|
||||
let enqueued_at = batch.enqueued_at.as_ref().unwrap();
|
||||
insert_task_datetime(wtxn, self.enqueued_at, enqueued_at.earliest, batch.uid)?;
|
||||
insert_task_datetime(wtxn, self.enqueued_at, enqueued_at.oldest, batch.uid)?;
|
||||
|
||||
// Update the started at and finished at
|
||||
if let Some(ref old_batch) = old_batch {
|
||||
|
@ -102,30 +102,33 @@ fn query_batches_simple() {
|
||||
.unwrap();
|
||||
assert_eq!(batches.len(), 1);
|
||||
batches[0].started_at = OffsetDateTime::UNIX_EPOCH;
|
||||
assert!(batches[0].enqueued_at.is_some());
|
||||
batches[0].enqueued_at = None;
|
||||
// Insta cannot snapshot our batches because the batch stats contains an enum as key: https://github.com/mitsuhiko/insta/issues/689
|
||||
let batch = serde_json::to_string_pretty(&batches[0]).unwrap();
|
||||
snapshot!(batch, @r#"
|
||||
{
|
||||
"uid": 0,
|
||||
"details": {
|
||||
"primaryKey": "mouse"
|
||||
},
|
||||
"stats": {
|
||||
"totalNbTasks": 1,
|
||||
"status": {
|
||||
"processing": 1
|
||||
},
|
||||
"types": {
|
||||
"indexCreation": 1
|
||||
},
|
||||
"indexUids": {
|
||||
"catto": 1
|
||||
}
|
||||
},
|
||||
"startedAt": "1970-01-01T00:00:00Z",
|
||||
"finishedAt": null
|
||||
{
|
||||
"uid": 0,
|
||||
"details": {
|
||||
"primaryKey": "mouse"
|
||||
},
|
||||
"stats": {
|
||||
"totalNbTasks": 1,
|
||||
"status": {
|
||||
"processing": 1
|
||||
},
|
||||
"types": {
|
||||
"indexCreation": 1
|
||||
},
|
||||
"indexUids": {
|
||||
"catto": 1
|
||||
}
|
||||
"#);
|
||||
},
|
||||
"startedAt": "1970-01-01T00:00:00Z",
|
||||
"finishedAt": null,
|
||||
"enqueuedAt": null
|
||||
}
|
||||
"#);
|
||||
|
||||
let query = Query { statuses: Some(vec![Status::Enqueued]), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
|
@ -8,6 +8,7 @@ mod tasks_test;
|
||||
mod test;
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
use std::fs::File as StdFile;
|
||||
use std::time::Duration;
|
||||
|
||||
use file_store::FileStore;
|
||||
@ -216,6 +217,11 @@ impl Queue {
|
||||
}
|
||||
}
|
||||
|
||||
/// Open and returns the task's content File.
|
||||
pub fn update_file(&self, uuid: Uuid) -> file_store::Result<StdFile> {
|
||||
self.file_store.get_update(uuid)
|
||||
}
|
||||
|
||||
/// Delete a file from the index scheduler.
|
||||
///
|
||||
/// Counterpart to the [`create_update_file`](IndexScheduler::create_update_file) method.
|
||||
|
@ -326,7 +326,7 @@ fn test_auto_deletion_of_tasks() {
|
||||
fn test_task_queue_is_full() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test_with_custom_config(vec![], |config| {
|
||||
// that's the minimum map size possible
|
||||
config.task_db_size = 1048576;
|
||||
config.task_db_size = 1048576 * 3;
|
||||
None
|
||||
});
|
||||
|
||||
|
@ -166,13 +166,41 @@ impl IndexScheduler {
|
||||
let processing_batch = &mut processing_batch;
|
||||
let progress = progress.clone();
|
||||
std::thread::scope(|s| {
|
||||
let p = progress.clone();
|
||||
let handle = std::thread::Builder::new()
|
||||
.name(String::from("batch-operation"))
|
||||
.spawn_scoped(s, move || {
|
||||
cloned_index_scheduler.process_batch(batch, processing_batch, progress)
|
||||
cloned_index_scheduler.process_batch(batch, processing_batch, p)
|
||||
})
|
||||
.unwrap();
|
||||
handle.join().unwrap_or(Err(Error::ProcessBatchPanicked))
|
||||
|
||||
match handle.join() {
|
||||
Ok(ret) => {
|
||||
if ret.is_err() {
|
||||
if let Ok(progress_view) =
|
||||
serde_json::to_string(&progress.as_progress_view())
|
||||
{
|
||||
tracing::warn!("Batch failed while doing: {progress_view}")
|
||||
}
|
||||
}
|
||||
ret
|
||||
}
|
||||
Err(panic) => {
|
||||
if let Ok(progress_view) =
|
||||
serde_json::to_string(&progress.as_progress_view())
|
||||
{
|
||||
tracing::warn!("Batch failed while doing: {progress_view}")
|
||||
}
|
||||
let msg = match panic.downcast_ref::<&'static str>() {
|
||||
Some(s) => *s,
|
||||
None => match panic.downcast_ref::<String>() {
|
||||
Some(s) => &s[..],
|
||||
None => "Box<dyn Any>",
|
||||
},
|
||||
};
|
||||
Err(Error::ProcessBatchPanicked(msg.to_string()))
|
||||
}
|
||||
}
|
||||
})
|
||||
};
|
||||
|
||||
|
@ -2,7 +2,7 @@ use std::collections::{BTreeSet, HashMap, HashSet};
|
||||
use std::panic::{catch_unwind, AssertUnwindSafe};
|
||||
use std::sync::atomic::Ordering;
|
||||
|
||||
use meilisearch_types::batches::BatchId;
|
||||
use meilisearch_types::batches::{BatchEnqueuedAt, BatchId};
|
||||
use meilisearch_types::heed::{RoTxn, RwTxn};
|
||||
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
|
||||
use meilisearch_types::milli::{self};
|
||||
@ -16,7 +16,10 @@ use crate::processing::{
|
||||
InnerSwappingTwoIndexes, SwappingTheIndexes, TaskCancelationProgress, TaskDeletionProgress,
|
||||
UpdateIndexProgress,
|
||||
};
|
||||
use crate::utils::{self, swap_index_uid_in_task, ProcessingBatch};
|
||||
use crate::utils::{
|
||||
self, remove_n_tasks_datetime_earlier_than, remove_task_datetime, swap_index_uid_in_task,
|
||||
ProcessingBatch,
|
||||
};
|
||||
use crate::{Error, IndexScheduler, Result, TaskId};
|
||||
|
||||
impl IndexScheduler {
|
||||
@ -323,8 +326,17 @@ impl IndexScheduler {
|
||||
match ret {
|
||||
Ok(Ok(())) => (),
|
||||
Ok(Err(e)) => return Err(Error::DatabaseUpgrade(Box::new(e))),
|
||||
Err(_e) => {
|
||||
return Err(Error::DatabaseUpgrade(Box::new(Error::ProcessBatchPanicked)));
|
||||
Err(e) => {
|
||||
let msg = match e.downcast_ref::<&'static str>() {
|
||||
Some(s) => *s,
|
||||
None => match e.downcast_ref::<String>() {
|
||||
Some(s) => &s[..],
|
||||
None => "Box<dyn Any>",
|
||||
},
|
||||
};
|
||||
return Err(Error::DatabaseUpgrade(Box::new(Error::ProcessBatchPanicked(
|
||||
msg.to_string(),
|
||||
))));
|
||||
}
|
||||
}
|
||||
|
||||
@ -418,7 +430,6 @@ impl IndexScheduler {
|
||||
to_delete_tasks -= &enqueued_tasks;
|
||||
|
||||
// 2. We now have a list of tasks to delete, delete them
|
||||
|
||||
let mut affected_indexes = HashSet::new();
|
||||
let mut affected_statuses = HashSet::new();
|
||||
let mut affected_kinds = HashSet::new();
|
||||
@ -515,9 +526,51 @@ impl IndexScheduler {
|
||||
tasks -= &to_delete_tasks;
|
||||
// We must remove the batch entirely
|
||||
if tasks.is_empty() {
|
||||
self.queue.batches.all_batches.delete(wtxn, &batch_id)?;
|
||||
self.queue.batch_to_tasks_mapping.delete(wtxn, &batch_id)?;
|
||||
if let Some(batch) = self.queue.batches.get_batch(wtxn, batch_id)? {
|
||||
if let Some(BatchEnqueuedAt { earliest, oldest }) = batch.enqueued_at {
|
||||
remove_task_datetime(
|
||||
wtxn,
|
||||
self.queue.batches.enqueued_at,
|
||||
earliest,
|
||||
batch_id,
|
||||
)?;
|
||||
remove_task_datetime(
|
||||
wtxn,
|
||||
self.queue.batches.enqueued_at,
|
||||
oldest,
|
||||
batch_id,
|
||||
)?;
|
||||
} else {
|
||||
// If we don't have the enqueued at in the batch it means the database comes from the v1.12
|
||||
// and we still need to find the date by scrolling the database
|
||||
remove_n_tasks_datetime_earlier_than(
|
||||
wtxn,
|
||||
self.queue.batches.enqueued_at,
|
||||
batch.started_at,
|
||||
batch.stats.total_nb_tasks.clamp(1, 2) as usize,
|
||||
batch_id,
|
||||
)?;
|
||||
}
|
||||
remove_task_datetime(
|
||||
wtxn,
|
||||
self.queue.batches.started_at,
|
||||
batch.started_at,
|
||||
batch_id,
|
||||
)?;
|
||||
if let Some(finished_at) = batch.finished_at {
|
||||
remove_task_datetime(
|
||||
wtxn,
|
||||
self.queue.batches.finished_at,
|
||||
finished_at,
|
||||
batch_id,
|
||||
)?;
|
||||
}
|
||||
|
||||
self.queue.batches.all_batches.delete(wtxn, &batch_id)?;
|
||||
self.queue.batch_to_tasks_mapping.delete(wtxn, &batch_id)?;
|
||||
}
|
||||
}
|
||||
|
||||
// Anyway, we must remove the batch from all its reverse indexes.
|
||||
// The only way to do that is to check
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::fs::File;
|
||||
use std::io::BufWriter;
|
||||
use std::sync::atomic::Ordering;
|
||||
@ -11,7 +12,9 @@ use meilisearch_types::tasks::{Details, KindWithContent, Status, Task};
|
||||
use time::macros::format_description;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use crate::processing::{AtomicDocumentStep, AtomicTaskStep, DumpCreationProgress};
|
||||
use crate::processing::{
|
||||
AtomicBatchStep, AtomicDocumentStep, AtomicTaskStep, DumpCreationProgress,
|
||||
};
|
||||
use crate::{Error, IndexScheduler, Result};
|
||||
|
||||
impl IndexScheduler {
|
||||
@ -102,7 +105,40 @@ impl IndexScheduler {
|
||||
}
|
||||
dump_tasks.flush()?;
|
||||
|
||||
// 3. Dump the indexes
|
||||
// 3. dump the batches
|
||||
progress.update_progress(DumpCreationProgress::DumpTheBatches);
|
||||
let mut dump_batches = dump.create_batches_queue()?;
|
||||
|
||||
let (atomic_batch_progress, update_batch_progress) =
|
||||
AtomicBatchStep::new(self.queue.batches.all_batches.len(&rtxn)? as u32);
|
||||
progress.update_progress(update_batch_progress);
|
||||
|
||||
for ret in self.queue.batches.all_batches.iter(&rtxn)? {
|
||||
if self.scheduler.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
|
||||
let (_, mut b) = ret?;
|
||||
// In the case we're dumping ourselves we want to be marked as finished
|
||||
// to not loop over ourselves indefinitely.
|
||||
if b.uid == task.uid {
|
||||
let finished_at = OffsetDateTime::now_utc();
|
||||
|
||||
// We're going to fake the date because we don't know if everything is going to go well.
|
||||
// But we need to dump the task as finished and successful.
|
||||
// If something fail everything will be set appropriately in the end.
|
||||
let mut statuses = BTreeMap::new();
|
||||
statuses.insert(Status::Succeeded, b.stats.total_nb_tasks);
|
||||
b.stats.status = statuses;
|
||||
b.finished_at = Some(finished_at);
|
||||
}
|
||||
|
||||
dump_batches.push_batch(&b)?;
|
||||
atomic_batch_progress.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
dump_batches.flush()?;
|
||||
|
||||
// 4. Dump the indexes
|
||||
progress.update_progress(DumpCreationProgress::DumpTheIndexes);
|
||||
let nb_indexes = self.index_mapper.index_mapping.len(&rtxn)? as u32;
|
||||
let mut count = 0;
|
||||
@ -142,7 +178,7 @@ impl IndexScheduler {
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
// 3.1. Dump the documents
|
||||
// 4.1. Dump the documents
|
||||
for ret in documents {
|
||||
if self.scheduler.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
@ -204,7 +240,7 @@ impl IndexScheduler {
|
||||
atomic.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
// 3.2. Dump the settings
|
||||
// 4.2. Dump the settings
|
||||
let settings = meilisearch_types::settings::settings(
|
||||
index,
|
||||
&rtxn,
|
||||
@ -215,10 +251,12 @@ impl IndexScheduler {
|
||||
Ok(())
|
||||
})?;
|
||||
|
||||
// 4. Dump experimental feature settings
|
||||
// 5. Dump experimental feature settings
|
||||
progress.update_progress(DumpCreationProgress::DumpTheExperimentalFeatures);
|
||||
let features = self.features().runtime_features();
|
||||
dump.create_experimental_features(features)?;
|
||||
let network = self.network();
|
||||
dump.create_network(network)?;
|
||||
|
||||
let dump_uid = started_at.format(format_description!(
|
||||
"[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
|
||||
|
@ -56,16 +56,13 @@ succeeded [1,]
|
||||
### Batches Index Tasks:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Enqueued At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Started At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Finished At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
@ -54,15 +54,12 @@ succeeded [1,]
|
||||
### Batches Index Tasks:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Enqueued At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Started At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Finished At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
@ -7,7 +7,7 @@ snapshot_kind: text
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "An unexpected crash occurred when processing the task.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "An unexpected crash occurred when processing the task: simulated panic", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued []
|
||||
|
@ -87,7 +87,6 @@ doggo [2,3,]
|
||||
girafo [4,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Enqueued At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
[timestamp] [2,]
|
||||
[timestamp] [3,]
|
||||
@ -95,7 +94,6 @@ girafo [4,]
|
||||
[timestamp] [5,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Started At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
[timestamp] [2,]
|
||||
[timestamp] [3,]
|
||||
@ -103,7 +101,6 @@ girafo [4,]
|
||||
[timestamp] [5,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Finished At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
[timestamp] [2,]
|
||||
[timestamp] [3,]
|
||||
|
@ -903,7 +903,7 @@ fn create_and_list_index() {
|
||||
|
||||
index_scheduler.index("kefir").unwrap();
|
||||
let list = index_scheduler.get_paginated_indexes_stats(&AuthFilter::default(), 0, 20).unwrap();
|
||||
snapshot!(json_string!(list, { "[1][0][1].created_at" => "[date]", "[1][0][1].updated_at" => "[date]", "[1][0][1].used_database_size" => "[bytes]", "[1][0][1].database_size" => "[bytes]" }), @r#"
|
||||
snapshot!(json_string!(list, { "[1][0][1].created_at" => "[date]", "[1][0][1].updated_at" => "[date]", "[1][0][1].used_database_size" => "[bytes]", "[1][0][1].database_size" => "[bytes]" }), @r###"
|
||||
[
|
||||
1,
|
||||
[
|
||||
@ -912,6 +912,8 @@ fn create_and_list_index() {
|
||||
{
|
||||
"number_of_documents": 0,
|
||||
"database_size": "[bytes]",
|
||||
"number_of_embeddings": 0,
|
||||
"number_of_embedded_documents": 0,
|
||||
"used_database_size": "[bytes]",
|
||||
"primary_key": null,
|
||||
"field_distribution": {},
|
||||
@ -921,5 +923,5 @@ fn create_and_list_index() {
|
||||
]
|
||||
]
|
||||
]
|
||||
"#);
|
||||
"###);
|
||||
}
|
||||
|
@ -6,8 +6,7 @@ use meili_snap::snapshot;
|
||||
use meilisearch_types::milli::obkv_to_json;
|
||||
use meilisearch_types::milli::update::IndexDocumentsMethod::*;
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use meilisearch_types::tasks::Kind;
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
use meilisearch_types::tasks::{Kind, KindWithContent};
|
||||
|
||||
use crate::insta_snapshot::snapshot_index_scheduler;
|
||||
use crate::test_utils::Breakpoint::*;
|
||||
|
@ -3,7 +3,7 @@
|
||||
use std::collections::{BTreeSet, HashSet};
|
||||
use std::ops::Bound;
|
||||
|
||||
use meilisearch_types::batches::{Batch, BatchId, BatchStats};
|
||||
use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchId, BatchStats};
|
||||
use meilisearch_types::heed::{Database, RoTxn, RwTxn};
|
||||
use meilisearch_types::milli::CboRoaringBitmapCodec;
|
||||
use meilisearch_types::task_view::DetailsView;
|
||||
@ -30,8 +30,7 @@ pub struct ProcessingBatch {
|
||||
pub kinds: HashSet<Kind>,
|
||||
pub indexes: HashSet<String>,
|
||||
pub canceled_by: HashSet<TaskId>,
|
||||
pub oldest_enqueued_at: Option<OffsetDateTime>,
|
||||
pub earliest_enqueued_at: Option<OffsetDateTime>,
|
||||
pub enqueued_at: Option<BatchEnqueuedAt>,
|
||||
pub started_at: OffsetDateTime,
|
||||
pub finished_at: Option<OffsetDateTime>,
|
||||
}
|
||||
@ -51,8 +50,7 @@ impl ProcessingBatch {
|
||||
kinds: HashSet::default(),
|
||||
indexes: HashSet::default(),
|
||||
canceled_by: HashSet::default(),
|
||||
oldest_enqueued_at: None,
|
||||
earliest_enqueued_at: None,
|
||||
enqueued_at: None,
|
||||
started_at: OffsetDateTime::now_utc(),
|
||||
finished_at: None,
|
||||
}
|
||||
@ -80,14 +78,18 @@ impl ProcessingBatch {
|
||||
if let Some(canceled_by) = task.canceled_by {
|
||||
self.canceled_by.insert(canceled_by);
|
||||
}
|
||||
self.oldest_enqueued_at =
|
||||
Some(self.oldest_enqueued_at.map_or(task.enqueued_at, |oldest_enqueued_at| {
|
||||
task.enqueued_at.min(oldest_enqueued_at)
|
||||
}));
|
||||
self.earliest_enqueued_at =
|
||||
Some(self.earliest_enqueued_at.map_or(task.enqueued_at, |earliest_enqueued_at| {
|
||||
task.enqueued_at.max(earliest_enqueued_at)
|
||||
}));
|
||||
match self.enqueued_at.as_mut() {
|
||||
Some(BatchEnqueuedAt { earliest, oldest }) => {
|
||||
*oldest = task.enqueued_at.min(*oldest);
|
||||
*earliest = task.enqueued_at.max(*earliest);
|
||||
}
|
||||
None => {
|
||||
self.enqueued_at = Some(BatchEnqueuedAt {
|
||||
earliest: task.enqueued_at,
|
||||
oldest: task.enqueued_at,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -138,6 +140,7 @@ impl ProcessingBatch {
|
||||
stats: self.stats.clone(),
|
||||
started_at: self.started_at,
|
||||
finished_at: self.finished_at,
|
||||
enqueued_at: self.enqueued_at,
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -174,6 +177,33 @@ pub(crate) fn remove_task_datetime(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn remove_n_tasks_datetime_earlier_than(
|
||||
wtxn: &mut RwTxn,
|
||||
database: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
earlier_than: OffsetDateTime,
|
||||
mut count: usize,
|
||||
task_id: TaskId,
|
||||
) -> Result<()> {
|
||||
let earlier_than = earlier_than.unix_timestamp_nanos();
|
||||
let mut iter = database.rev_range_mut(wtxn, &(..earlier_than))?;
|
||||
while let Some((current, mut existing)) = iter.next().transpose()? {
|
||||
count -= existing.remove(task_id) as usize;
|
||||
|
||||
if existing.is_empty() {
|
||||
// safety: We don't keep references to the database
|
||||
unsafe { iter.del_current()? };
|
||||
} else {
|
||||
// safety: We don't keep references to the database
|
||||
unsafe { iter.put_current(¤t, &existing)? };
|
||||
}
|
||||
if count == 0 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn keep_ids_within_datetimes(
|
||||
rtxn: &RoTxn,
|
||||
ids: &mut RoaringBitmap,
|
||||
@ -329,14 +359,27 @@ impl crate::IndexScheduler {
|
||||
kind,
|
||||
} = task;
|
||||
assert_eq!(uid, task.uid);
|
||||
if let Some(ref batch) = batch_uid {
|
||||
if task.status != Status::Enqueued {
|
||||
let batch_uid = batch_uid.expect("All non enqueued tasks must be part of a batch");
|
||||
assert!(self
|
||||
.queue
|
||||
.batch_to_tasks_mapping
|
||||
.get(&rtxn, batch)
|
||||
.get(&rtxn, &batch_uid)
|
||||
.unwrap()
|
||||
.unwrap()
|
||||
.contains(uid));
|
||||
let batch = self.queue.batches.get_batch(&rtxn, batch_uid).unwrap().unwrap();
|
||||
assert_eq!(batch.uid, batch_uid);
|
||||
if task.status == Status::Processing {
|
||||
assert!(batch.progress.is_some());
|
||||
} else {
|
||||
assert!(batch.progress.is_none());
|
||||
}
|
||||
assert_eq!(batch.started_at, task.started_at.unwrap());
|
||||
assert_eq!(batch.finished_at, task.finished_at);
|
||||
let enqueued_at = batch.enqueued_at.unwrap();
|
||||
assert!(task.enqueued_at >= enqueued_at.oldest);
|
||||
assert!(task.enqueued_at <= enqueued_at.earliest);
|
||||
}
|
||||
if let Some(task_index_uid) = &task_index_uid {
|
||||
assert!(self
|
||||
|
@ -1,9 +1,10 @@
|
||||
use crate::{upgrade::upgrade_index_scheduler, Result};
|
||||
use meilisearch_types::{
|
||||
heed::{types::Str, Database, Env, RoTxn, RwTxn},
|
||||
milli::heed_codec::version::VersionCodec,
|
||||
versioning,
|
||||
};
|
||||
use meilisearch_types::heed::types::Str;
|
||||
use meilisearch_types::heed::{self, Database, Env, RoTxn, RwTxn};
|
||||
use meilisearch_types::milli::heed_codec::version::VersionCodec;
|
||||
use meilisearch_types::versioning;
|
||||
|
||||
use crate::upgrade::upgrade_index_scheduler;
|
||||
use crate::Result;
|
||||
|
||||
/// The number of database used by queue itself
|
||||
const NUMBER_OF_DATABASES: u32 = 1;
|
||||
@ -21,30 +22,38 @@ pub struct Versioning {
|
||||
}
|
||||
|
||||
impl Versioning {
|
||||
pub(crate) const fn nb_db() -> u32 {
|
||||
pub const fn nb_db() -> u32 {
|
||||
NUMBER_OF_DATABASES
|
||||
}
|
||||
|
||||
pub fn get_version(&self, rtxn: &RoTxn) -> Result<Option<(u32, u32, u32)>> {
|
||||
Ok(self.version.get(rtxn, entry_name::MAIN)?)
|
||||
pub fn get_version(&self, rtxn: &RoTxn) -> Result<Option<(u32, u32, u32)>, heed::Error> {
|
||||
self.version.get(rtxn, entry_name::MAIN)
|
||||
}
|
||||
|
||||
pub fn set_version(&self, wtxn: &mut RwTxn, version: (u32, u32, u32)) -> Result<()> {
|
||||
Ok(self.version.put(wtxn, entry_name::MAIN, &version)?)
|
||||
pub fn set_version(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
version: (u32, u32, u32),
|
||||
) -> Result<(), heed::Error> {
|
||||
self.version.put(wtxn, entry_name::MAIN, &version)
|
||||
}
|
||||
|
||||
pub fn set_current_version(&self, wtxn: &mut RwTxn) -> Result<()> {
|
||||
pub fn set_current_version(&self, wtxn: &mut RwTxn) -> Result<(), heed::Error> {
|
||||
let major = versioning::VERSION_MAJOR.parse().unwrap();
|
||||
let minor = versioning::VERSION_MINOR.parse().unwrap();
|
||||
let patch = versioning::VERSION_PATCH.parse().unwrap();
|
||||
self.set_version(wtxn, (major, minor, patch))
|
||||
}
|
||||
|
||||
/// Create an index scheduler and start its run loop.
|
||||
/// Return `Self` without checking anything about the version
|
||||
pub fn raw_new(env: &Env, wtxn: &mut RwTxn) -> Result<Self, heed::Error> {
|
||||
let version = env.create_database(wtxn, Some(db_name::VERSION))?;
|
||||
Ok(Self { version })
|
||||
}
|
||||
|
||||
pub(crate) fn new(env: &Env, db_version: (u32, u32, u32)) -> Result<Self> {
|
||||
let mut wtxn = env.write_txn()?;
|
||||
let version = env.create_database(&mut wtxn, Some(db_name::VERSION))?;
|
||||
let this = Self { version };
|
||||
let this = Self::raw_new(env, &mut wtxn)?;
|
||||
let from = match this.get_version(&wtxn)? {
|
||||
Some(version) => version,
|
||||
// fresh DB: use the db version
|
||||
|
@ -24,9 +24,35 @@ pub struct Batch {
|
||||
pub started_at: OffsetDateTime,
|
||||
#[serde(with = "time::serde::rfc3339::option")]
|
||||
pub finished_at: Option<OffsetDateTime>,
|
||||
|
||||
// Enqueued at is never displayed and is only required when removing a batch.
|
||||
// It's always some except when upgrading from a database pre v1.12
|
||||
pub enqueued_at: Option<BatchEnqueuedAt>,
|
||||
}
|
||||
|
||||
#[derive(Default, Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
impl PartialEq for Batch {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
let Self { uid, progress, details, stats, started_at, finished_at, enqueued_at } = self;
|
||||
|
||||
*uid == other.uid
|
||||
&& progress.is_none() == other.progress.is_none()
|
||||
&& details == &other.details
|
||||
&& stats == &other.stats
|
||||
&& started_at == &other.started_at
|
||||
&& finished_at == &other.finished_at
|
||||
&& enqueued_at == &other.enqueued_at
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct BatchEnqueuedAt {
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub earliest: OffsetDateTime,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub oldest: OffsetDateTime,
|
||||
}
|
||||
|
||||
#[derive(Default, Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
pub struct BatchStats {
|
||||
|
@ -193,6 +193,8 @@ merge_with_error_impl_take_error_message!(ParseTaskKindError);
|
||||
merge_with_error_impl_take_error_message!(ParseTaskStatusError);
|
||||
merge_with_error_impl_take_error_message!(IndexUidFormatError);
|
||||
merge_with_error_impl_take_error_message!(InvalidMultiSearchWeight);
|
||||
merge_with_error_impl_take_error_message!(InvalidNetworkUrl);
|
||||
merge_with_error_impl_take_error_message!(InvalidNetworkSearchApiKey);
|
||||
merge_with_error_impl_take_error_message!(InvalidSearchSemanticRatio);
|
||||
merge_with_error_impl_take_error_message!(InvalidSearchRankingScoreThreshold);
|
||||
merge_with_error_impl_take_error_message!(InvalidSimilarRankingScoreThreshold);
|
||||
|
@ -260,7 +260,13 @@ InvalidMultiSearchMergeFacets , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidMultiSearchQueryFacets , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidMultiSearchQueryPagination , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidMultiSearchQueryRankingRules , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidMultiSearchQueryPosition , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidMultiSearchRemote , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidMultiSearchWeight , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidNetworkRemotes , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidNetworkSelf , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidNetworkSearchApiKey , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidNetworkUrl , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchAttributesToSearchOn , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchAttributesToHighlight , InvalidRequest , BAD_REQUEST ;
|
||||
@ -351,14 +357,22 @@ MissingDocumentId , InvalidRequest , BAD_REQUEST ;
|
||||
MissingFacetSearchFacetName , InvalidRequest , BAD_REQUEST ;
|
||||
MissingIndexUid , InvalidRequest , BAD_REQUEST ;
|
||||
MissingMasterKey , Auth , UNAUTHORIZED ;
|
||||
MissingNetworkUrl , InvalidRequest , BAD_REQUEST ;
|
||||
MissingPayload , InvalidRequest , BAD_REQUEST ;
|
||||
MissingSearchHybrid , InvalidRequest , BAD_REQUEST ;
|
||||
MissingSwapIndexes , InvalidRequest , BAD_REQUEST ;
|
||||
MissingTaskFilters , InvalidRequest , BAD_REQUEST ;
|
||||
NoSpaceLeftOnDevice , System , UNPROCESSABLE_ENTITY;
|
||||
PayloadTooLarge , InvalidRequest , PAYLOAD_TOO_LARGE ;
|
||||
RemoteBadResponse , System , BAD_GATEWAY ;
|
||||
RemoteBadRequest , InvalidRequest , BAD_REQUEST ;
|
||||
RemoteCouldNotSendRequest , System , BAD_GATEWAY ;
|
||||
RemoteInvalidApiKey , Auth , FORBIDDEN ;
|
||||
RemoteRemoteError , System , BAD_GATEWAY ;
|
||||
RemoteTimeout , System , BAD_GATEWAY ;
|
||||
TooManySearchRequests , System , SERVICE_UNAVAILABLE ;
|
||||
TaskNotFound , InvalidRequest , NOT_FOUND ;
|
||||
TaskFileNotFound , InvalidRequest , NOT_FOUND ;
|
||||
BatchNotFound , InvalidRequest , NOT_FOUND ;
|
||||
TooManyOpenFiles , System , UNPROCESSABLE_ENTITY ;
|
||||
TooManyVectors , InvalidRequest , BAD_REQUEST ;
|
||||
@ -583,6 +597,18 @@ impl fmt::Display for deserr_codes::InvalidSimilarRankingScoreThreshold {
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for deserr_codes::InvalidNetworkUrl {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "the value of `url` is invalid, expected a string.")
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for deserr_codes::InvalidNetworkSearchApiKey {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "the value of `searchApiKey` is invalid, expected a string.")
|
||||
}
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! internal_error {
|
||||
($target:ty : $($other:path), *) => {
|
||||
|
@ -1,3 +1,5 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, Copy, Default, PartialEq, Eq)]
|
||||
@ -7,6 +9,8 @@ pub struct RuntimeTogglableFeatures {
|
||||
pub logs_route: bool,
|
||||
pub edit_documents_by_function: bool,
|
||||
pub contains_filter: bool,
|
||||
pub network: bool,
|
||||
pub get_task_documents_route: bool,
|
||||
}
|
||||
|
||||
#[derive(Default, Debug, Clone, Copy)]
|
||||
@ -15,3 +19,20 @@ pub struct InstanceTogglableFeatures {
|
||||
pub logs_route: bool,
|
||||
pub contains_filter: bool,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Remote {
|
||||
pub url: String,
|
||||
#[serde(default)]
|
||||
pub search_api_key: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Network {
|
||||
#[serde(default, rename = "self")]
|
||||
pub local: Option<String>,
|
||||
#[serde(default)]
|
||||
pub remotes: BTreeMap<String, Remote>,
|
||||
}
|
||||
|
@ -4,13 +4,14 @@ use std::fmt;
|
||||
use std::str::FromStr;
|
||||
|
||||
use deserr::Deserr;
|
||||
use serde::Serialize;
|
||||
use utoipa::ToSchema;
|
||||
|
||||
use crate::error::{Code, ErrorCode};
|
||||
|
||||
/// An index uid is composed of only ascii alphanumeric characters, - and _, between 1 and 400
|
||||
/// bytes long
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Deserr, PartialOrd, Ord, ToSchema)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Deserr, PartialOrd, Ord, Serialize, ToSchema)]
|
||||
#[deserr(try_from(String) = IndexUid::try_from -> IndexUidFormatError)]
|
||||
#[schema(value_type = String, example = "movies")]
|
||||
pub struct IndexUid(String);
|
||||
|
@ -302,6 +302,12 @@ pub enum Action {
|
||||
#[serde(rename = "experimental.update")]
|
||||
#[deserr(rename = "experimental.update")]
|
||||
ExperimentalFeaturesUpdate,
|
||||
#[serde(rename = "network.get")]
|
||||
#[deserr(rename = "network.get")]
|
||||
NetworkGet,
|
||||
#[serde(rename = "network.update")]
|
||||
#[deserr(rename = "network.update")]
|
||||
NetworkUpdate,
|
||||
}
|
||||
|
||||
impl Action {
|
||||
@ -341,6 +347,8 @@ impl Action {
|
||||
KEYS_DELETE => Some(Self::KeysDelete),
|
||||
EXPERIMENTAL_FEATURES_GET => Some(Self::ExperimentalFeaturesGet),
|
||||
EXPERIMENTAL_FEATURES_UPDATE => Some(Self::ExperimentalFeaturesUpdate),
|
||||
NETWORK_GET => Some(Self::NetworkGet),
|
||||
NETWORK_UPDATE => Some(Self::NetworkUpdate),
|
||||
_otherwise => None,
|
||||
}
|
||||
}
|
||||
@ -386,4 +394,7 @@ pub mod actions {
|
||||
pub const KEYS_DELETE: u8 = KeysDelete.repr();
|
||||
pub const EXPERIMENTAL_FEATURES_GET: u8 = ExperimentalFeaturesGet.repr();
|
||||
pub const EXPERIMENTAL_FEATURES_UPDATE: u8 = ExperimentalFeaturesUpdate.repr();
|
||||
|
||||
pub const NETWORK_GET: u8 = NetworkGet.repr();
|
||||
pub const NETWORK_UPDATE: u8 = NetworkUpdate.repr();
|
||||
}
|
||||
|
@ -1,7 +1,10 @@
|
||||
use std::fs;
|
||||
use std::io::{self, ErrorKind};
|
||||
use std::io::{ErrorKind, Write};
|
||||
use std::path::Path;
|
||||
|
||||
use milli::heed;
|
||||
use tempfile::NamedTempFile;
|
||||
|
||||
/// The name of the file that contains the version of the database.
|
||||
pub const VERSION_FILE_NAME: &str = "VERSION";
|
||||
|
||||
@ -10,37 +13,7 @@ pub static VERSION_MINOR: &str = env!("CARGO_PKG_VERSION_MINOR");
|
||||
pub static VERSION_PATCH: &str = env!("CARGO_PKG_VERSION_PATCH");
|
||||
|
||||
/// Persists the version of the current Meilisearch binary to a VERSION file
|
||||
pub fn update_version_file_for_dumpless_upgrade(
|
||||
db_path: &Path,
|
||||
from: (u32, u32, u32),
|
||||
to: (u32, u32, u32),
|
||||
) -> Result<(), VersionFileError> {
|
||||
let (from_major, from_minor, from_patch) = from;
|
||||
let (to_major, to_minor, to_patch) = to;
|
||||
|
||||
if from_major > to_major
|
||||
|| (from_major == to_major && from_minor > to_minor)
|
||||
|| (from_major == to_major && from_minor == to_minor && from_patch > to_patch)
|
||||
{
|
||||
Err(VersionFileError::DowngradeNotSupported {
|
||||
major: from_major,
|
||||
minor: from_minor,
|
||||
patch: from_patch,
|
||||
})
|
||||
} else if from_major < 1 || (from_major == to_major && from_minor < 12) {
|
||||
Err(VersionFileError::TooOldForAutomaticUpgrade {
|
||||
major: from_major,
|
||||
minor: from_minor,
|
||||
patch: from_patch,
|
||||
})
|
||||
} else {
|
||||
create_current_version_file(db_path)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Persists the version of the current Meilisearch binary to a VERSION file
|
||||
pub fn create_current_version_file(db_path: &Path) -> io::Result<()> {
|
||||
pub fn create_current_version_file(db_path: &Path) -> anyhow::Result<()> {
|
||||
create_version_file(db_path, VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH)
|
||||
}
|
||||
|
||||
@ -49,9 +22,14 @@ pub fn create_version_file(
|
||||
major: &str,
|
||||
minor: &str,
|
||||
patch: &str,
|
||||
) -> io::Result<()> {
|
||||
) -> anyhow::Result<()> {
|
||||
let version_path = db_path.join(VERSION_FILE_NAME);
|
||||
fs::write(version_path, format!("{}.{}.{}", major, minor, patch))
|
||||
// In order to persist the file later we must create it in the `data.ms` and not in `/tmp`
|
||||
let mut file = NamedTempFile::new_in(db_path)?;
|
||||
file.write_all(format!("{}.{}.{}", major, minor, patch).as_bytes())?;
|
||||
file.flush()?;
|
||||
file.persist(version_path)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get_version(db_path: &Path) -> Result<(u32, u32, u32), VersionFileError> {
|
||||
@ -61,7 +39,7 @@ pub fn get_version(db_path: &Path) -> Result<(u32, u32, u32), VersionFileError>
|
||||
Ok(version) => parse_version(&version),
|
||||
Err(error) => match error.kind() {
|
||||
ErrorKind::NotFound => Err(VersionFileError::MissingVersionFile),
|
||||
_ => Err(error.into()),
|
||||
_ => Err(anyhow::Error::from(error).into()),
|
||||
},
|
||||
}
|
||||
}
|
||||
@ -112,7 +90,9 @@ pub enum VersionFileError {
|
||||
DowngradeNotSupported { major: u32, minor: u32, patch: u32 },
|
||||
#[error("Database version {major}.{minor}.{patch} is too old for the experimental dumpless upgrade feature. Please generate a dump using the v{major}.{minor}.{patch} and import it in the v{VERSION_MAJOR}.{VERSION_MINOR}.{VERSION_PATCH}")]
|
||||
TooOldForAutomaticUpgrade { major: u32, minor: u32, patch: u32 },
|
||||
#[error("Error while modifying the database: {0}")]
|
||||
ErrorWhileModifyingTheDatabase(#[from] heed::Error),
|
||||
|
||||
#[error(transparent)]
|
||||
IoError(#[from] std::io::Error),
|
||||
AnyhowError(#[from] anyhow::Error),
|
||||
}
|
||||
|
@ -31,6 +31,7 @@ use crate::routes::{create_all_stats, Stats};
|
||||
use crate::Opt;
|
||||
|
||||
const ANALYTICS_HEADER: &str = "X-Meilisearch-Client";
|
||||
const MEILI_SERVER_PROVIDER: &str = "MEILI_SERVER_PROVIDER";
|
||||
|
||||
/// Write the instance-uid in the `data.ms` and in `~/.config/MeiliSearch/path-to-db-instance-uid`. Ignore the errors.
|
||||
fn write_user_id(db_path: &Path, user_id: &InstanceUid) {
|
||||
@ -195,6 +196,8 @@ struct Infos {
|
||||
experimental_reduce_indexing_memory_usage: bool,
|
||||
experimental_max_number_of_batched_tasks: usize,
|
||||
experimental_limit_batched_tasks_total_size: u64,
|
||||
experimental_network: bool,
|
||||
experimental_get_task_documents_route: bool,
|
||||
gpu_enabled: bool,
|
||||
db_path: bool,
|
||||
import_dump: bool,
|
||||
@ -285,6 +288,8 @@ impl Infos {
|
||||
logs_route,
|
||||
edit_documents_by_function,
|
||||
contains_filter,
|
||||
network,
|
||||
get_task_documents_route,
|
||||
} = features;
|
||||
|
||||
// We're going to override every sensible information.
|
||||
@ -302,6 +307,8 @@ impl Infos {
|
||||
experimental_replication_parameters,
|
||||
experimental_enable_logs_route: experimental_enable_logs_route | logs_route,
|
||||
experimental_reduce_indexing_memory_usage,
|
||||
experimental_network: network,
|
||||
experimental_get_task_documents_route: get_task_documents_route,
|
||||
gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(),
|
||||
db_path: db_path != PathBuf::from("./data.ms"),
|
||||
import_dump: import_dump.is_some(),
|
||||
@ -357,7 +364,7 @@ impl Segment {
|
||||
"cores": sys.cpus().len(),
|
||||
"ram_size": sys.total_memory(),
|
||||
"disk_size": disks.iter().map(|disk| disk.total_space()).max(),
|
||||
"server_provider": std::env::var("MEILI_SERVER_PROVIDER").ok(),
|
||||
"server_provider": std::env::var(MEILI_SERVER_PROVIDER).ok(),
|
||||
})
|
||||
});
|
||||
let number_of_documents =
|
||||
@ -380,10 +387,18 @@ impl Segment {
|
||||
index_scheduler: Arc<IndexScheduler>,
|
||||
auth_controller: Arc<AuthController>,
|
||||
) {
|
||||
const INTERVAL: Duration = Duration::from_secs(60 * 60); // one hour
|
||||
// The first batch must be sent after one hour.
|
||||
let interval: Duration = match std::env::var(MEILI_SERVER_PROVIDER) {
|
||||
Ok(provider) if provider.starts_with("meili_cloud:") => {
|
||||
Duration::from_secs(60 * 60) // one hour
|
||||
}
|
||||
_ => {
|
||||
// We're an open source instance
|
||||
Duration::from_secs(60 * 60 * 24) // one day
|
||||
}
|
||||
};
|
||||
|
||||
let mut interval =
|
||||
tokio::time::interval_at(tokio::time::Instant::now() + INTERVAL, INTERVAL);
|
||||
tokio::time::interval_at(tokio::time::Instant::now() + interval, interval);
|
||||
|
||||
loop {
|
||||
select! {
|
||||
|
@ -32,6 +32,7 @@ use analytics::Analytics;
|
||||
use anyhow::bail;
|
||||
use error::PayloadError;
|
||||
use extractors::payload::PayloadConfig;
|
||||
use index_scheduler::versioning::Versioning;
|
||||
use index_scheduler::{IndexScheduler, IndexSchedulerOptions};
|
||||
use meilisearch_auth::AuthController;
|
||||
use meilisearch_types::milli::constants::VERSION_MAJOR;
|
||||
@ -40,10 +41,9 @@ use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMetho
|
||||
use meilisearch_types::settings::apply_settings_to_builder;
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
use meilisearch_types::versioning::{
|
||||
create_current_version_file, get_version, update_version_file_for_dumpless_upgrade,
|
||||
VersionFileError, VERSION_MINOR, VERSION_PATCH,
|
||||
create_current_version_file, get_version, VersionFileError, VERSION_MINOR, VERSION_PATCH,
|
||||
};
|
||||
use meilisearch_types::{compression, milli, VERSION_FILE_NAME};
|
||||
use meilisearch_types::{compression, heed, milli, VERSION_FILE_NAME};
|
||||
pub use option::Opt;
|
||||
use option::ScheduleSnapshot;
|
||||
use search_queue::SearchQueue;
|
||||
@ -356,14 +356,19 @@ fn open_or_create_database_unchecked(
|
||||
|
||||
/// Ensures Meilisearch version is compatible with the database, returns an error in case of version mismatch.
|
||||
/// Returns the version that was contained in the version file
|
||||
fn check_version(opt: &Opt, binary_version: (u32, u32, u32)) -> anyhow::Result<(u32, u32, u32)> {
|
||||
fn check_version(
|
||||
opt: &Opt,
|
||||
index_scheduler_opt: &IndexSchedulerOptions,
|
||||
binary_version: (u32, u32, u32),
|
||||
) -> anyhow::Result<(u32, u32, u32)> {
|
||||
let (bin_major, bin_minor, bin_patch) = binary_version;
|
||||
let (db_major, db_minor, db_patch) = get_version(&opt.db_path)?;
|
||||
|
||||
if db_major != bin_major || db_minor != bin_minor || db_patch > bin_patch {
|
||||
if opt.experimental_dumpless_upgrade {
|
||||
update_version_file_for_dumpless_upgrade(
|
||||
&opt.db_path,
|
||||
opt,
|
||||
index_scheduler_opt,
|
||||
(db_major, db_minor, db_patch),
|
||||
(bin_major, bin_minor, bin_patch),
|
||||
)?;
|
||||
@ -380,6 +385,57 @@ fn check_version(opt: &Opt, binary_version: (u32, u32, u32)) -> anyhow::Result<(
|
||||
Ok((db_major, db_minor, db_patch))
|
||||
}
|
||||
|
||||
/// Persists the version of the current Meilisearch binary to a VERSION file
|
||||
pub fn update_version_file_for_dumpless_upgrade(
|
||||
opt: &Opt,
|
||||
index_scheduler_opt: &IndexSchedulerOptions,
|
||||
from: (u32, u32, u32),
|
||||
to: (u32, u32, u32),
|
||||
) -> Result<(), VersionFileError> {
|
||||
let (from_major, from_minor, from_patch) = from;
|
||||
let (to_major, to_minor, to_patch) = to;
|
||||
|
||||
// Early exit in case of error
|
||||
if from_major > to_major
|
||||
|| (from_major == to_major && from_minor > to_minor)
|
||||
|| (from_major == to_major && from_minor == to_minor && from_patch > to_patch)
|
||||
{
|
||||
return Err(VersionFileError::DowngradeNotSupported {
|
||||
major: from_major,
|
||||
minor: from_minor,
|
||||
patch: from_patch,
|
||||
});
|
||||
} else if from_major < 1 || (from_major == to_major && from_minor < 12) {
|
||||
return Err(VersionFileError::TooOldForAutomaticUpgrade {
|
||||
major: from_major,
|
||||
minor: from_minor,
|
||||
patch: from_patch,
|
||||
});
|
||||
}
|
||||
|
||||
// In the case of v1.12, the index-scheduler didn't store its internal version at the time.
|
||||
// => We must write it immediately **in the index-scheduler** otherwise we'll update the version file
|
||||
// there is a risk of DB corruption if a restart happens after writing the version file but before
|
||||
// writing the version in the index-scheduler. See <https://github.com/meilisearch/meilisearch/issues/5280>
|
||||
if from_major == 1 && from_minor == 12 {
|
||||
let env = unsafe {
|
||||
heed::EnvOpenOptions::new()
|
||||
.max_dbs(Versioning::nb_db())
|
||||
.map_size(index_scheduler_opt.task_db_size)
|
||||
.open(&index_scheduler_opt.tasks_path)
|
||||
}?;
|
||||
let mut wtxn = env.write_txn()?;
|
||||
let versioning = Versioning::raw_new(&env, &mut wtxn)?;
|
||||
versioning.set_version(&mut wtxn, (from_major, from_minor, from_patch))?;
|
||||
wtxn.commit()?;
|
||||
// Should be instant since we're the only one using the env
|
||||
env.prepare_for_closing().wait();
|
||||
}
|
||||
|
||||
create_current_version_file(&opt.db_path)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Ensure you're in a valid state and open the IndexScheduler + AuthController for you.
|
||||
fn open_or_create_database(
|
||||
opt: &Opt,
|
||||
@ -387,7 +443,11 @@ fn open_or_create_database(
|
||||
empty_db: bool,
|
||||
binary_version: (u32, u32, u32),
|
||||
) -> anyhow::Result<(IndexScheduler, AuthController)> {
|
||||
let version = if !empty_db { check_version(opt, binary_version)? } else { binary_version };
|
||||
let version = if !empty_db {
|
||||
check_version(opt, &index_scheduler_opt, binary_version)?
|
||||
} else {
|
||||
binary_version
|
||||
};
|
||||
|
||||
open_or_create_database_unchecked(opt, index_scheduler_opt, OnFailure::KeepDb, version)
|
||||
}
|
||||
@ -431,10 +491,13 @@ fn import_dump(
|
||||
keys.push(key);
|
||||
}
|
||||
|
||||
// 3. Import the runtime features.
|
||||
// 3. Import the runtime features and network
|
||||
let features = dump_reader.features()?.unwrap_or_default();
|
||||
index_scheduler.put_runtime_features(features)?;
|
||||
|
||||
let network = dump_reader.network()?.cloned().unwrap_or_default();
|
||||
index_scheduler.put_network(network)?;
|
||||
|
||||
let indexer_config = index_scheduler.indexer_config();
|
||||
|
||||
// /!\ The tasks must be imported AFTER importing the indexes or else the scheduler might
|
||||
@ -508,9 +571,15 @@ fn import_dump(
|
||||
index_scheduler.refresh_index_stats(&uid)?;
|
||||
}
|
||||
|
||||
// 5. Import the queue
|
||||
let mut index_scheduler_dump = index_scheduler.register_dumped_task()?;
|
||||
// 5.1. Import the batches
|
||||
for ret in dump_reader.batches()? {
|
||||
let batch = ret?;
|
||||
index_scheduler_dump.register_dumped_batch(batch)?;
|
||||
}
|
||||
|
||||
// 5. Import the tasks.
|
||||
// 5.2. Import the tasks
|
||||
for ret in dump_reader.tasks()? {
|
||||
let (task, file) = ret?;
|
||||
index_scheduler_dump.register_dumped_task(task, file)?;
|
||||
|
@ -50,6 +50,8 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
logs_route: Some(false),
|
||||
edit_documents_by_function: Some(false),
|
||||
contains_filter: Some(false),
|
||||
network: Some(false),
|
||||
get_task_documents_route: Some(false),
|
||||
})),
|
||||
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
|
||||
{
|
||||
@ -88,6 +90,10 @@ pub struct RuntimeTogglableFeatures {
|
||||
pub edit_documents_by_function: Option<bool>,
|
||||
#[deserr(default)]
|
||||
pub contains_filter: Option<bool>,
|
||||
#[deserr(default)]
|
||||
pub network: Option<bool>,
|
||||
#[deserr(default)]
|
||||
pub get_task_documents_route: Option<bool>,
|
||||
}
|
||||
|
||||
impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogglableFeatures {
|
||||
@ -97,6 +103,8 @@ impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogg
|
||||
logs_route,
|
||||
edit_documents_by_function,
|
||||
contains_filter,
|
||||
network,
|
||||
get_task_documents_route,
|
||||
} = value;
|
||||
|
||||
Self {
|
||||
@ -104,6 +112,8 @@ impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogg
|
||||
logs_route: Some(logs_route),
|
||||
edit_documents_by_function: Some(edit_documents_by_function),
|
||||
contains_filter: Some(contains_filter),
|
||||
network: Some(network),
|
||||
get_task_documents_route: Some(get_task_documents_route),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -114,6 +124,8 @@ pub struct PatchExperimentalFeatureAnalytics {
|
||||
logs_route: bool,
|
||||
edit_documents_by_function: bool,
|
||||
contains_filter: bool,
|
||||
network: bool,
|
||||
get_task_documents_route: bool,
|
||||
}
|
||||
|
||||
impl Aggregate for PatchExperimentalFeatureAnalytics {
|
||||
@ -127,6 +139,8 @@ impl Aggregate for PatchExperimentalFeatureAnalytics {
|
||||
logs_route: new.logs_route,
|
||||
edit_documents_by_function: new.edit_documents_by_function,
|
||||
contains_filter: new.contains_filter,
|
||||
network: new.network,
|
||||
get_task_documents_route: new.get_task_documents_route,
|
||||
})
|
||||
}
|
||||
|
||||
@ -149,6 +163,8 @@ impl Aggregate for PatchExperimentalFeatureAnalytics {
|
||||
logs_route: Some(false),
|
||||
edit_documents_by_function: Some(false),
|
||||
contains_filter: Some(false),
|
||||
network: Some(false),
|
||||
get_task_documents_route: Some(false),
|
||||
})),
|
||||
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
|
||||
{
|
||||
@ -181,16 +197,23 @@ async fn patch_features(
|
||||
.edit_documents_by_function
|
||||
.unwrap_or(old_features.edit_documents_by_function),
|
||||
contains_filter: new_features.0.contains_filter.unwrap_or(old_features.contains_filter),
|
||||
network: new_features.0.network.unwrap_or(old_features.network),
|
||||
get_task_documents_route: new_features
|
||||
.0
|
||||
.get_task_documents_route
|
||||
.unwrap_or(old_features.get_task_documents_route),
|
||||
};
|
||||
|
||||
// explicitly destructure for analytics rather than using the `Serialize` implementation, because
|
||||
// the it renames to camelCase, which we don't want for analytics.
|
||||
// it renames to camelCase, which we don't want for analytics.
|
||||
// **Do not** ignore fields with `..` or `_` here, because we want to add them in the future.
|
||||
let meilisearch_types::features::RuntimeTogglableFeatures {
|
||||
metrics,
|
||||
logs_route,
|
||||
edit_documents_by_function,
|
||||
contains_filter,
|
||||
network,
|
||||
get_task_documents_route,
|
||||
} = new_features;
|
||||
|
||||
analytics.publish(
|
||||
@ -199,6 +222,8 @@ async fn patch_features(
|
||||
logs_route,
|
||||
edit_documents_by_function,
|
||||
contains_filter,
|
||||
network,
|
||||
get_task_documents_route,
|
||||
},
|
||||
&req,
|
||||
);
|
||||
|
@ -496,6 +496,12 @@ pub struct IndexStats {
|
||||
pub number_of_documents: u64,
|
||||
/// Whether or not the index is currently ingesting document
|
||||
pub is_indexing: bool,
|
||||
/// Number of embeddings in the index
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub number_of_embeddings: Option<u64>,
|
||||
/// Number of embedded documents in the index
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub number_of_embedded_documents: Option<u64>,
|
||||
/// Association of every field name with the number of times it occurs in the documents.
|
||||
#[schema(value_type = HashMap<String, u64>)]
|
||||
pub field_distribution: FieldDistribution,
|
||||
@ -506,6 +512,8 @@ impl From<index_scheduler::IndexStats> for IndexStats {
|
||||
IndexStats {
|
||||
number_of_documents: stats.inner_stats.number_of_documents,
|
||||
is_indexing: stats.is_indexing,
|
||||
number_of_embeddings: stats.inner_stats.number_of_embeddings,
|
||||
number_of_embedded_documents: stats.inner_stats.number_of_embedded_documents,
|
||||
field_distribution: stats.inner_stats.field_distribution,
|
||||
}
|
||||
}
|
||||
@ -524,6 +532,8 @@ impl From<index_scheduler::IndexStats> for IndexStats {
|
||||
(status = OK, description = "The stats of the index", body = IndexStats, content_type = "application/json", example = json!(
|
||||
{
|
||||
"numberOfDocuments": 10,
|
||||
"numberOfEmbeddings": 10,
|
||||
"numberOfEmbeddedDocuments": 10,
|
||||
"isIndexing": true,
|
||||
"fieldDistribution": {
|
||||
"genre": 10,
|
||||
|
@ -34,6 +34,7 @@ use crate::routes::features::RuntimeTogglableFeatures;
|
||||
use crate::routes::indexes::documents::{DocumentDeletionByFilter, DocumentEditionByFunction};
|
||||
use crate::routes::indexes::IndexView;
|
||||
use crate::routes::multi_search::SearchResults;
|
||||
use crate::routes::network::{Network, Remote};
|
||||
use crate::routes::swap_indexes::SwapIndexesPayload;
|
||||
use crate::search::{
|
||||
FederatedSearch, FederatedSearchResult, Federation, FederationOptions, MergeFacets,
|
||||
@ -54,6 +55,7 @@ mod logs;
|
||||
mod metrics;
|
||||
mod multi_search;
|
||||
mod multi_search_analytics;
|
||||
pub mod network;
|
||||
mod open_api_utils;
|
||||
mod snapshot;
|
||||
mod swap_indexes;
|
||||
@ -75,6 +77,7 @@ pub mod tasks;
|
||||
(path = "/multi-search", api = multi_search::MultiSearchApi),
|
||||
(path = "/swap-indexes", api = swap_indexes::SwapIndexesApi),
|
||||
(path = "/experimental-features", api = features::ExperimentalFeaturesApi),
|
||||
(path = "/network", api = network::NetworkApi),
|
||||
),
|
||||
paths(get_health, get_version, get_stats),
|
||||
tags(
|
||||
@ -85,7 +88,7 @@ pub mod tasks;
|
||||
url = "/",
|
||||
description = "Local server",
|
||||
)),
|
||||
components(schemas(PaginationView<KeyView>, PaginationView<IndexView>, IndexView, DocumentDeletionByFilter, AllBatches, BatchStats, ProgressStepView, ProgressView, BatchView, RuntimeTogglableFeatures, SwapIndexesPayload, DocumentEditionByFunction, MergeFacets, FederationOptions, SearchQueryWithIndex, Federation, FederatedSearch, FederatedSearchResult, SearchResults, SearchResultWithIndex, SimilarQuery, SimilarResult, PaginationView<serde_json::Value>, BrowseQuery, UpdateIndexRequest, IndexUid, IndexCreateRequest, KeyView, Action, CreateApiKey, UpdateStderrLogs, LogMode, GetLogs, IndexStats, Stats, HealthStatus, HealthResponse, VersionResponse, Code, ErrorType, AllTasks, TaskView, Status, DetailsView, ResponseError, Settings<Unchecked>, Settings<Checked>, TypoSettings, MinWordSizeTyposSetting, FacetingSettings, PaginationSettings, SummarizedTaskView, Kind))
|
||||
components(schemas(PaginationView<KeyView>, PaginationView<IndexView>, IndexView, DocumentDeletionByFilter, AllBatches, BatchStats, ProgressStepView, ProgressView, BatchView, RuntimeTogglableFeatures, SwapIndexesPayload, DocumentEditionByFunction, MergeFacets, FederationOptions, SearchQueryWithIndex, Federation, FederatedSearch, FederatedSearchResult, SearchResults, SearchResultWithIndex, SimilarQuery, SimilarResult, PaginationView<serde_json::Value>, BrowseQuery, UpdateIndexRequest, IndexUid, IndexCreateRequest, KeyView, Action, CreateApiKey, UpdateStderrLogs, LogMode, GetLogs, IndexStats, Stats, HealthStatus, HealthResponse, VersionResponse, Code, ErrorType, AllTasks, TaskView, Status, DetailsView, ResponseError, Settings<Unchecked>, Settings<Checked>, TypoSettings, MinWordSizeTyposSetting, FacetingSettings, PaginationSettings, SummarizedTaskView, Kind, Network, Remote))
|
||||
)]
|
||||
pub struct MeilisearchApi;
|
||||
|
||||
@ -103,7 +106,8 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
.service(web::scope("/multi-search").configure(multi_search::configure))
|
||||
.service(web::scope("/swap-indexes").configure(swap_indexes::configure))
|
||||
.service(web::scope("/metrics").configure(metrics::configure))
|
||||
.service(web::scope("/experimental-features").configure(features::configure));
|
||||
.service(web::scope("/experimental-features").configure(features::configure))
|
||||
.service(web::scope("/network").configure(network::configure));
|
||||
|
||||
#[cfg(feature = "swagger")]
|
||||
{
|
||||
@ -359,9 +363,9 @@ pub async fn running() -> HttpResponse {
|
||||
#[derive(Serialize, Debug, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Stats {
|
||||
/// The size of the database, in bytes.
|
||||
/// The disk space used by the database, in bytes.
|
||||
pub database_size: u64,
|
||||
#[serde(skip)]
|
||||
/// The size of the database, in bytes.
|
||||
pub used_database_size: u64,
|
||||
/// The date of the last update in the RFC 3339 formats. Can be `null` if no update has ever been processed.
|
||||
#[serde(serialize_with = "time::serde::rfc3339::option::serialize")]
|
||||
@ -383,6 +387,7 @@ pub struct Stats {
|
||||
(status = 200, description = "The stats of the instance", body = Stats, content_type = "application/json", example = json!(
|
||||
{
|
||||
"databaseSize": 567,
|
||||
"usedDatabaseSize": 456,
|
||||
"lastUpdate": "2019-11-20T09:40:33.711324Z",
|
||||
"indexes": {
|
||||
"movies": {
|
||||
|
@ -20,6 +20,7 @@ use crate::routes::indexes::search::search_kind;
|
||||
use crate::search::{
|
||||
add_search_rules, perform_federated_search, perform_search, FederatedSearch,
|
||||
FederatedSearchResult, RetrieveVectors, SearchQueryWithIndex, SearchResultWithIndex,
|
||||
PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE,
|
||||
};
|
||||
use crate::search_queue::SearchQueue;
|
||||
|
||||
@ -48,6 +49,7 @@ pub struct SearchResults {
|
||||
/// Bundle multiple search queries in a single API request. Use this endpoint to search through multiple indexes at once.
|
||||
#[utoipa::path(
|
||||
post,
|
||||
request_body = FederatedSearch,
|
||||
path = "",
|
||||
tag = "Multi-search",
|
||||
security(("Bearer" = ["search", "*"])),
|
||||
@ -186,18 +188,22 @@ pub async fn multi_search_with_post(
|
||||
|
||||
let response = match federation {
|
||||
Some(federation) => {
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_federated_search(&index_scheduler, queries, federation, features)
|
||||
})
|
||||
.await;
|
||||
// check remote header
|
||||
let is_proxy = req
|
||||
.headers()
|
||||
.get(PROXY_SEARCH_HEADER)
|
||||
.is_some_and(|value| value.as_bytes() == PROXY_SEARCH_HEADER_VALUE.as_bytes());
|
||||
let search_result =
|
||||
perform_federated_search(&index_scheduler, queries, federation, features, is_proxy)
|
||||
.await;
|
||||
permit.drop().await;
|
||||
|
||||
if let Ok(Ok(_)) = search_result {
|
||||
if search_result.is_ok() {
|
||||
multi_aggregate.succeed();
|
||||
}
|
||||
|
||||
analytics.publish(multi_aggregate, &req);
|
||||
HttpResponse::Ok().json(search_result??)
|
||||
HttpResponse::Ok().json(search_result?)
|
||||
}
|
||||
None => {
|
||||
// Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only,
|
||||
|
@ -13,6 +13,8 @@ pub struct MultiSearchAggregator {
|
||||
|
||||
// sum of the number of distinct indexes in each single request, use with total_received to compute an avg
|
||||
total_distinct_index_count: usize,
|
||||
// sum of the number of distinct remotes in each single request, use with total_received to compute an avg
|
||||
total_distinct_remote_count: usize,
|
||||
// number of queries with a single index, use with total_received to compute a proportion
|
||||
total_single_index: usize,
|
||||
|
||||
@ -31,46 +33,49 @@ impl MultiSearchAggregator {
|
||||
pub fn from_federated_search(federated_search: &FederatedSearch) -> Self {
|
||||
let use_federation = federated_search.federation.is_some();
|
||||
|
||||
let distinct_indexes: HashSet<_> = federated_search
|
||||
.queries
|
||||
.iter()
|
||||
.map(|query| {
|
||||
let query = &query;
|
||||
// make sure we get a compilation error if a field gets added to / removed from SearchQueryWithIndex
|
||||
let SearchQueryWithIndex {
|
||||
index_uid,
|
||||
federation_options: _,
|
||||
q: _,
|
||||
vector: _,
|
||||
offset: _,
|
||||
limit: _,
|
||||
page: _,
|
||||
hits_per_page: _,
|
||||
attributes_to_retrieve: _,
|
||||
retrieve_vectors: _,
|
||||
attributes_to_crop: _,
|
||||
crop_length: _,
|
||||
attributes_to_highlight: _,
|
||||
show_ranking_score: _,
|
||||
show_ranking_score_details: _,
|
||||
show_matches_position: _,
|
||||
filter: _,
|
||||
sort: _,
|
||||
distinct: _,
|
||||
facets: _,
|
||||
highlight_pre_tag: _,
|
||||
highlight_post_tag: _,
|
||||
crop_marker: _,
|
||||
matching_strategy: _,
|
||||
attributes_to_search_on: _,
|
||||
hybrid: _,
|
||||
ranking_score_threshold: _,
|
||||
locales: _,
|
||||
} = query;
|
||||
let mut distinct_indexes = HashSet::with_capacity(federated_search.queries.len());
|
||||
let mut distinct_remotes = HashSet::with_capacity(federated_search.queries.len());
|
||||
|
||||
index_uid.as_str()
|
||||
})
|
||||
.collect();
|
||||
// make sure we get a compilation error if a field gets added to / removed from SearchQueryWithIndex
|
||||
for SearchQueryWithIndex {
|
||||
index_uid,
|
||||
federation_options,
|
||||
q: _,
|
||||
vector: _,
|
||||
offset: _,
|
||||
limit: _,
|
||||
page: _,
|
||||
hits_per_page: _,
|
||||
attributes_to_retrieve: _,
|
||||
retrieve_vectors: _,
|
||||
attributes_to_crop: _,
|
||||
crop_length: _,
|
||||
attributes_to_highlight: _,
|
||||
show_ranking_score: _,
|
||||
show_ranking_score_details: _,
|
||||
show_matches_position: _,
|
||||
filter: _,
|
||||
sort: _,
|
||||
distinct: _,
|
||||
facets: _,
|
||||
highlight_pre_tag: _,
|
||||
highlight_post_tag: _,
|
||||
crop_marker: _,
|
||||
matching_strategy: _,
|
||||
attributes_to_search_on: _,
|
||||
hybrid: _,
|
||||
ranking_score_threshold: _,
|
||||
locales: _,
|
||||
} in &federated_search.queries
|
||||
{
|
||||
if let Some(federation_options) = federation_options {
|
||||
if let Some(remote) = &federation_options.remote {
|
||||
distinct_remotes.insert(remote.as_str());
|
||||
}
|
||||
}
|
||||
|
||||
distinct_indexes.insert(index_uid.as_str());
|
||||
}
|
||||
|
||||
let show_ranking_score =
|
||||
federated_search.queries.iter().any(|query| query.show_ranking_score);
|
||||
@ -81,6 +86,7 @@ impl MultiSearchAggregator {
|
||||
total_received: 1,
|
||||
total_succeeded: 0,
|
||||
total_distinct_index_count: distinct_indexes.len(),
|
||||
total_distinct_remote_count: distinct_remotes.len(),
|
||||
total_single_index: if distinct_indexes.len() == 1 { 1 } else { 0 },
|
||||
total_search_count: federated_search.queries.len(),
|
||||
show_ranking_score,
|
||||
@ -110,6 +116,8 @@ impl Aggregate for MultiSearchAggregator {
|
||||
let total_succeeded = this.total_succeeded.saturating_add(new.total_succeeded);
|
||||
let total_distinct_index_count =
|
||||
this.total_distinct_index_count.saturating_add(new.total_distinct_index_count);
|
||||
let total_distinct_remote_count =
|
||||
this.total_distinct_remote_count.saturating_add(new.total_distinct_remote_count);
|
||||
let total_single_index = this.total_single_index.saturating_add(new.total_single_index);
|
||||
let total_search_count = this.total_search_count.saturating_add(new.total_search_count);
|
||||
let show_ranking_score = this.show_ranking_score || new.show_ranking_score;
|
||||
@ -121,6 +129,7 @@ impl Aggregate for MultiSearchAggregator {
|
||||
total_received,
|
||||
total_succeeded,
|
||||
total_distinct_index_count,
|
||||
total_distinct_remote_count,
|
||||
total_single_index,
|
||||
total_search_count,
|
||||
show_ranking_score,
|
||||
@ -134,6 +143,7 @@ impl Aggregate for MultiSearchAggregator {
|
||||
total_received,
|
||||
total_succeeded,
|
||||
total_distinct_index_count,
|
||||
total_distinct_remote_count,
|
||||
total_single_index,
|
||||
total_search_count,
|
||||
show_ranking_score,
|
||||
@ -152,6 +162,10 @@ impl Aggregate for MultiSearchAggregator {
|
||||
"total_distinct_index_count": total_distinct_index_count,
|
||||
"avg_distinct_index_count": (total_distinct_index_count as f64) / (total_received as f64), // not 0 else returned early
|
||||
},
|
||||
"remotes": {
|
||||
"total_distinct_remote_count": total_distinct_remote_count,
|
||||
"avg_distinct_remote_count": (total_distinct_remote_count as f64) / (total_received as f64), // not 0 else returned early
|
||||
},
|
||||
"searches": {
|
||||
"total_search_count": total_search_count,
|
||||
"avg_search_count": (total_search_count as f64) / (total_received as f64),
|
||||
|
261
crates/meilisearch/src/routes/network.rs
Normal file
261
crates/meilisearch/src/routes/network.rs
Normal file
@ -0,0 +1,261 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use actix_web::web::{self, Data};
|
||||
use actix_web::{HttpRequest, HttpResponse};
|
||||
use deserr::actix_web::AwebJson;
|
||||
use deserr::Deserr;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use itertools::{EitherOrBoth, Itertools};
|
||||
use meilisearch_types::deserr::DeserrJsonError;
|
||||
use meilisearch_types::error::deserr_codes::{
|
||||
InvalidNetworkRemotes, InvalidNetworkSearchApiKey, InvalidNetworkSelf, InvalidNetworkUrl,
|
||||
};
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::features::{Network as DbNetwork, Remote as DbRemote};
|
||||
use meilisearch_types::keys::actions;
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use serde::Serialize;
|
||||
use tracing::debug;
|
||||
use utoipa::{OpenApi, ToSchema};
|
||||
|
||||
use crate::analytics::{Aggregate, Analytics};
|
||||
use crate::extractors::authentication::policies::ActionPolicy;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
|
||||
#[derive(OpenApi)]
|
||||
#[openapi(
|
||||
paths(get_network, patch_network),
|
||||
tags((
|
||||
name = "Network",
|
||||
description = "The `/network` route allows you to describe the topology of a network of Meilisearch instances.
|
||||
|
||||
This route is **synchronous**. This means that no task object will be returned, and any change to the network will be made available immediately.",
|
||||
external_docs(url = "https://www.meilisearch.com/docs/reference/api/network"),
|
||||
)),
|
||||
)]
|
||||
pub struct NetworkApi;
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(
|
||||
web::resource("")
|
||||
.route(web::get().to(get_network))
|
||||
.route(web::patch().to(SeqHandler(patch_network))),
|
||||
);
|
||||
}
|
||||
|
||||
/// Get network topology
|
||||
///
|
||||
/// Get a list of all Meilisearch instances currently known to this instance.
|
||||
#[utoipa::path(
|
||||
get,
|
||||
path = "",
|
||||
tag = "Network",
|
||||
security(("Bearer" = ["network.get", "network.*", "*"])),
|
||||
responses(
|
||||
(status = OK, description = "Known nodes are returned", body = Network, content_type = "application/json", example = json!(
|
||||
{
|
||||
"self": "ms-0",
|
||||
"remotes": {
|
||||
"ms-0": Remote { url: Setting::Set("http://localhost:7700".into()), search_api_key: Setting::Reset },
|
||||
"ms-1": Remote { url: Setting::Set("http://localhost:7701".into()), search_api_key: Setting::Set("foo".into()) },
|
||||
"ms-2": Remote { url: Setting::Set("http://localhost:7702".into()), search_api_key: Setting::Set("bar".into()) },
|
||||
}
|
||||
})),
|
||||
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
|
||||
{
|
||||
"message": "The Authorization header is missing. It must use the bearer authorization method.",
|
||||
"code": "missing_authorization_header",
|
||||
"type": "auth",
|
||||
"link": "https://docs.meilisearch.com/errors#missing_authorization_header"
|
||||
}
|
||||
)),
|
||||
)
|
||||
)]
|
||||
async fn get_network(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::NETWORK_GET }>, Data<IndexScheduler>>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
index_scheduler.features().check_network("Using the /network route")?;
|
||||
|
||||
let network = index_scheduler.network();
|
||||
debug!(returns = ?network, "Get network");
|
||||
Ok(HttpResponse::Ok().json(network))
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserr, ToSchema, Serialize)]
|
||||
#[deserr(error = DeserrJsonError<InvalidNetworkRemotes>, rename_all = camelCase, deny_unknown_fields)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
pub struct Remote {
|
||||
#[schema(value_type = Option<String>, example = json!({
|
||||
"ms-0": Remote { url: Setting::Set("http://localhost:7700".into()), search_api_key: Setting::Reset },
|
||||
"ms-1": Remote { url: Setting::Set("http://localhost:7701".into()), search_api_key: Setting::Set("foo".into()) },
|
||||
"ms-2": Remote { url: Setting::Set("http://localhost:7702".into()), search_api_key: Setting::Set("bar".into()) },
|
||||
}))]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidNetworkUrl>)]
|
||||
#[serde(default)]
|
||||
pub url: Setting<String>,
|
||||
#[schema(value_type = Option<String>, example = json!("XWnBI8QHUc-4IlqbKPLUDuhftNq19mQtjc6JvmivzJU"))]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidNetworkSearchApiKey>)]
|
||||
#[serde(default)]
|
||||
pub search_api_key: Setting<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserr, ToSchema, Serialize)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
pub struct Network {
|
||||
#[schema(value_type = Option<BTreeMap<String, Remote>>, example = json!("http://localhost:7700"))]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidNetworkRemotes>)]
|
||||
#[serde(default)]
|
||||
pub remotes: Setting<BTreeMap<String, Option<Remote>>>,
|
||||
#[schema(value_type = Option<String>, example = json!("ms-00"), rename = "self")]
|
||||
#[serde(default, rename = "self")]
|
||||
#[deserr(default, rename = "self", error = DeserrJsonError<InvalidNetworkSelf>)]
|
||||
pub local: Setting<String>,
|
||||
}
|
||||
|
||||
impl Remote {
|
||||
pub fn try_into_db_node(self, name: &str) -> Result<DbRemote, ResponseError> {
|
||||
Ok(DbRemote {
|
||||
url: self.url.set().ok_or(ResponseError::from_msg(
|
||||
format!("Missing field `.remotes.{name}.url`"),
|
||||
meilisearch_types::error::Code::MissingNetworkUrl,
|
||||
))?,
|
||||
search_api_key: self.search_api_key.set(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct PatchNetworkAnalytics {
|
||||
network_size: usize,
|
||||
network_has_self: bool,
|
||||
}
|
||||
|
||||
impl Aggregate for PatchNetworkAnalytics {
|
||||
fn event_name(&self) -> &'static str {
|
||||
"Network Updated"
|
||||
}
|
||||
|
||||
fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
|
||||
Box::new(Self { network_size: new.network_size, network_has_self: new.network_has_self })
|
||||
}
|
||||
|
||||
fn into_event(self: Box<Self>) -> serde_json::Value {
|
||||
serde_json::to_value(*self).unwrap_or_default()
|
||||
}
|
||||
}
|
||||
|
||||
/// Configure Network
|
||||
///
|
||||
/// Add or remove nodes from network.
|
||||
#[utoipa::path(
|
||||
patch,
|
||||
path = "",
|
||||
tag = "Network",
|
||||
request_body = Network,
|
||||
security(("Bearer" = ["network.update", "network.*", "*"])),
|
||||
responses(
|
||||
(status = OK, description = "New network state is returned", body = Network, content_type = "application/json", example = json!(
|
||||
{
|
||||
"self": "ms-0",
|
||||
"remotes": {
|
||||
"ms-0": Remote { url: Setting::Set("http://localhost:7700".into()), search_api_key: Setting::Reset },
|
||||
"ms-1": Remote { url: Setting::Set("http://localhost:7701".into()), search_api_key: Setting::Set("foo".into()) },
|
||||
"ms-2": Remote { url: Setting::Set("http://localhost:7702".into()), search_api_key: Setting::Set("bar".into()) },
|
||||
}
|
||||
})),
|
||||
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
|
||||
{
|
||||
"message": "The Authorization header is missing. It must use the bearer authorization method.",
|
||||
"code": "missing_authorization_header",
|
||||
"type": "auth",
|
||||
"link": "https://docs.meilisearch.com/errors#missing_authorization_header"
|
||||
}
|
||||
)),
|
||||
)
|
||||
)]
|
||||
async fn patch_network(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::NETWORK_UPDATE }>, Data<IndexScheduler>>,
|
||||
new_network: AwebJson<Network, DeserrJsonError>,
|
||||
req: HttpRequest,
|
||||
analytics: Data<Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
index_scheduler.features().check_network("Using the /network route")?;
|
||||
|
||||
let new_network = new_network.0;
|
||||
let old_network = index_scheduler.network();
|
||||
debug!(parameters = ?new_network, "Patch network");
|
||||
|
||||
let merged_self = match new_network.local {
|
||||
Setting::Set(new_self) => Some(new_self),
|
||||
Setting::Reset => None,
|
||||
Setting::NotSet => old_network.local,
|
||||
};
|
||||
|
||||
let merged_remotes = match new_network.remotes {
|
||||
Setting::Set(new_remotes) => {
|
||||
let mut merged_remotes = BTreeMap::new();
|
||||
for either_or_both in old_network
|
||||
.remotes
|
||||
.into_iter()
|
||||
.merge_join_by(new_remotes.into_iter(), |left, right| left.0.cmp(&right.0))
|
||||
{
|
||||
match either_or_both {
|
||||
EitherOrBoth::Both((key, old), (_, Some(new))) => {
|
||||
let DbRemote { url: old_url, search_api_key: old_search_api_key } = old;
|
||||
|
||||
let Remote { url: new_url, search_api_key: new_search_api_key } = new;
|
||||
|
||||
let merged = DbRemote {
|
||||
url: match new_url {
|
||||
Setting::Set(new_url) => new_url,
|
||||
Setting::Reset => {
|
||||
return Err(ResponseError::from_msg(
|
||||
format!(
|
||||
"Field `.remotes.{key}.url` cannot be set to `null`"
|
||||
),
|
||||
meilisearch_types::error::Code::InvalidNetworkUrl,
|
||||
))
|
||||
}
|
||||
Setting::NotSet => old_url,
|
||||
},
|
||||
search_api_key: match new_search_api_key {
|
||||
Setting::Set(new_search_api_key) => Some(new_search_api_key),
|
||||
Setting::Reset => None,
|
||||
Setting::NotSet => old_search_api_key,
|
||||
},
|
||||
};
|
||||
merged_remotes.insert(key, merged);
|
||||
}
|
||||
EitherOrBoth::Both((_, _), (_, None)) | EitherOrBoth::Right((_, None)) => {}
|
||||
EitherOrBoth::Left((key, node)) => {
|
||||
merged_remotes.insert(key, node);
|
||||
}
|
||||
EitherOrBoth::Right((key, Some(node))) => {
|
||||
let node = node.try_into_db_node(&key)?;
|
||||
merged_remotes.insert(key, node);
|
||||
}
|
||||
}
|
||||
}
|
||||
merged_remotes
|
||||
}
|
||||
Setting::Reset => BTreeMap::new(),
|
||||
Setting::NotSet => old_network.remotes,
|
||||
};
|
||||
|
||||
analytics.publish(
|
||||
PatchNetworkAnalytics {
|
||||
network_size: merged_remotes.len(),
|
||||
network_has_self: merged_self.is_some(),
|
||||
},
|
||||
&req,
|
||||
);
|
||||
|
||||
let merged_network = DbNetwork { local: merged_self, remotes: merged_remotes };
|
||||
index_scheduler.put_network(merged_network.clone())?;
|
||||
debug!(returns = ?merged_network, "Patch network");
|
||||
Ok(HttpResponse::Ok().json(merged_network))
|
||||
}
|
@ -1,3 +1,5 @@
|
||||
use std::io::ErrorKind;
|
||||
|
||||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use deserr::actix_web::AwebQueryParameter;
|
||||
@ -16,6 +18,7 @@ use serde::Serialize;
|
||||
use time::format_description::well_known::Rfc3339;
|
||||
use time::macros::format_description;
|
||||
use time::{Date, Duration, OffsetDateTime, Time};
|
||||
use tokio::io::AsyncReadExt;
|
||||
use tokio::task;
|
||||
use utoipa::{IntoParams, OpenApi, ToSchema};
|
||||
|
||||
@ -44,7 +47,11 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
.route(web::delete().to(SeqHandler(delete_tasks))),
|
||||
)
|
||||
.service(web::resource("/cancel").route(web::post().to(SeqHandler(cancel_tasks))))
|
||||
.service(web::resource("/{task_id}").route(web::get().to(SeqHandler(get_task))));
|
||||
.service(web::resource("/{task_id}").route(web::get().to(SeqHandler(get_task))))
|
||||
.service(
|
||||
web::resource("/{task_id}/documents")
|
||||
.route(web::get().to(SeqHandler(get_task_documents_file))),
|
||||
);
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserr, IntoParams)]
|
||||
@ -639,6 +646,76 @@ async fn get_task(
|
||||
}
|
||||
}
|
||||
|
||||
/// Get a task's documents.
|
||||
///
|
||||
/// Get a [task's documents file](https://www.meilisearch.com/docs/learn/async/asynchronous_operations).
|
||||
#[utoipa::path(
|
||||
get,
|
||||
path = "/{taskUid}/documents",
|
||||
tag = "Tasks",
|
||||
security(("Bearer" = ["tasks.get", "tasks.*", "*"])),
|
||||
params(("taskUid", format = UInt32, example = 0, description = "The task identifier", nullable = false)),
|
||||
responses(
|
||||
(status = 200, description = "The content of the task update", body = serde_json::Value, content_type = "application/x-ndjson"),
|
||||
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
|
||||
{
|
||||
"message": "The Authorization header is missing. It must use the bearer authorization method.",
|
||||
"code": "missing_authorization_header",
|
||||
"type": "auth",
|
||||
"link": "https://docs.meilisearch.com/errors#missing_authorization_header"
|
||||
}
|
||||
)),
|
||||
(status = 404, description = "The task uid does not exists", body = ResponseError, content_type = "application/json", example = json!(
|
||||
{
|
||||
"message": "Task :taskUid not found.",
|
||||
"code": "task_not_found",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors/#task_not_found"
|
||||
}
|
||||
))
|
||||
)
|
||||
)]
|
||||
async fn get_task_documents_file(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::TASKS_GET }>, Data<IndexScheduler>>,
|
||||
task_uid: web::Path<String>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
index_scheduler.features().check_get_task_documents_route()?;
|
||||
let task_uid_string = task_uid.into_inner();
|
||||
|
||||
let task_uid: TaskId = match task_uid_string.parse() {
|
||||
Ok(id) => id,
|
||||
Err(_e) => {
|
||||
return Err(index_scheduler::Error::InvalidTaskUid { task_uid: task_uid_string }.into())
|
||||
}
|
||||
};
|
||||
|
||||
let query = index_scheduler::Query { uids: Some(vec![task_uid]), ..Query::default() };
|
||||
let filters = index_scheduler.filters();
|
||||
let (tasks, _) = index_scheduler.get_tasks_from_authorized_indexes(&query, filters)?;
|
||||
|
||||
if let Some(task) = tasks.first() {
|
||||
match task.content_uuid() {
|
||||
Some(uuid) => {
|
||||
let mut tfile = match index_scheduler.queue.update_file(uuid) {
|
||||
Ok(file) => tokio::fs::File::from_std(file),
|
||||
Err(file_store::Error::IoError(e)) if e.kind() == ErrorKind::NotFound => {
|
||||
return Err(index_scheduler::Error::TaskFileNotFound(task_uid).into())
|
||||
}
|
||||
Err(e) => return Err(e.into()),
|
||||
};
|
||||
// Yes, that's awful to put everything in memory when we could have streamed it from
|
||||
// disk but it's really (really) complex to do with the current state of async Rust.
|
||||
let mut content = String::new();
|
||||
tfile.read_to_string(&mut content).await?;
|
||||
Ok(HttpResponse::Ok().content_type("application/x-ndjson").body(content))
|
||||
}
|
||||
None => Err(index_scheduler::Error::TaskFileNotFound(task_uid).into()),
|
||||
}
|
||||
} else {
|
||||
Err(index_scheduler::Error::TaskNotFound(task_uid).into())
|
||||
}
|
||||
}
|
||||
|
||||
pub enum DeserializeDateOption {
|
||||
Before,
|
||||
After,
|
||||
|
@ -1,923 +0,0 @@
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::BTreeMap;
|
||||
use std::fmt;
|
||||
use std::iter::Zip;
|
||||
use std::rc::Rc;
|
||||
use std::str::FromStr as _;
|
||||
use std::time::Duration;
|
||||
use std::vec::{IntoIter, Vec};
|
||||
|
||||
use actix_http::StatusCode;
|
||||
use index_scheduler::{IndexScheduler, RoFeatures};
|
||||
use indexmap::IndexMap;
|
||||
use meilisearch_types::deserr::DeserrJsonError;
|
||||
use meilisearch_types::error::deserr_codes::{
|
||||
InvalidMultiSearchFacetsByIndex, InvalidMultiSearchMaxValuesPerFacet,
|
||||
InvalidMultiSearchMergeFacets, InvalidMultiSearchWeight, InvalidSearchLimit,
|
||||
InvalidSearchOffset,
|
||||
};
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::milli::score_details::{ScoreDetails, ScoreValue};
|
||||
use meilisearch_types::milli::{self, DocumentId, OrderBy, TimeBudget};
|
||||
use roaring::RoaringBitmap;
|
||||
use serde::Serialize;
|
||||
use utoipa::ToSchema;
|
||||
|
||||
use super::ranking_rules::{self, RankingRules};
|
||||
use super::{
|
||||
compute_facet_distribution_stats, prepare_search, AttributesFormat, ComputedFacets, FacetStats,
|
||||
HitMaker, HitsInfo, RetrieveVectors, SearchHit, SearchKind, SearchQuery, SearchQueryWithIndex,
|
||||
};
|
||||
use crate::error::MeilisearchHttpError;
|
||||
use crate::routes::indexes::search::search_kind;
|
||||
|
||||
pub const DEFAULT_FEDERATED_WEIGHT: f64 = 1.0;
|
||||
|
||||
#[derive(Debug, Default, Clone, Copy, PartialEq, deserr::Deserr, ToSchema)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct FederationOptions {
|
||||
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchWeight>)]
|
||||
#[schema(value_type = f64)]
|
||||
pub weight: Weight,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
|
||||
#[deserr(try_from(f64) = TryFrom::try_from -> InvalidMultiSearchWeight)]
|
||||
pub struct Weight(f64);
|
||||
|
||||
impl Default for Weight {
|
||||
fn default() -> Self {
|
||||
Weight(DEFAULT_FEDERATED_WEIGHT)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::convert::TryFrom<f64> for Weight {
|
||||
type Error = InvalidMultiSearchWeight;
|
||||
|
||||
fn try_from(f: f64) -> Result<Self, Self::Error> {
|
||||
if f < 0.0 {
|
||||
Err(InvalidMultiSearchWeight)
|
||||
} else {
|
||||
Ok(Weight(f))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Deref for Weight {
|
||||
type Target = f64;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, deserr::Deserr, ToSchema)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
pub struct Federation {
|
||||
#[deserr(default = super::DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
|
||||
pub limit: usize,
|
||||
#[deserr(default = super::DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
|
||||
pub offset: usize,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchFacetsByIndex>)]
|
||||
pub facets_by_index: BTreeMap<IndexUid, Option<Vec<String>>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchMergeFacets>)]
|
||||
pub merge_facets: Option<MergeFacets>,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, deserr::Deserr, Default, ToSchema)]
|
||||
#[deserr(error = DeserrJsonError<InvalidMultiSearchMergeFacets>, rename_all = camelCase, deny_unknown_fields)]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
pub struct MergeFacets {
|
||||
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchMaxValuesPerFacet>)]
|
||||
pub max_values_per_facet: Option<usize>,
|
||||
}
|
||||
|
||||
#[derive(Debug, deserr::Deserr, ToSchema)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
pub struct FederatedSearch {
|
||||
pub queries: Vec<SearchQueryWithIndex>,
|
||||
#[deserr(default)]
|
||||
pub federation: Option<Federation>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
pub struct FederatedSearchResult {
|
||||
pub hits: Vec<SearchHit>,
|
||||
pub processing_time_ms: u128,
|
||||
#[serde(flatten)]
|
||||
pub hits_info: HitsInfo,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub semantic_hit_count: Option<u32>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
#[schema(value_type = Option<BTreeMap<String, BTreeMap<String, u64>>>)]
|
||||
pub facet_distribution: Option<BTreeMap<String, IndexMap<String, u64>>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub facet_stats: Option<BTreeMap<String, FacetStats>>,
|
||||
#[serde(skip_serializing_if = "FederatedFacets::is_empty")]
|
||||
pub facets_by_index: FederatedFacets,
|
||||
|
||||
// These fields are only used for analytics purposes
|
||||
#[serde(skip)]
|
||||
pub degraded: bool,
|
||||
#[serde(skip)]
|
||||
pub used_negative_operator: bool,
|
||||
}
|
||||
|
||||
impl fmt::Debug for FederatedSearchResult {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let FederatedSearchResult {
|
||||
hits,
|
||||
processing_time_ms,
|
||||
hits_info,
|
||||
semantic_hit_count,
|
||||
degraded,
|
||||
used_negative_operator,
|
||||
facet_distribution,
|
||||
facet_stats,
|
||||
facets_by_index,
|
||||
} = self;
|
||||
|
||||
let mut debug = f.debug_struct("SearchResult");
|
||||
// The most important thing when looking at a search result is the time it took to process
|
||||
debug.field("processing_time_ms", &processing_time_ms);
|
||||
debug.field("hits", &format!("[{} hits returned]", hits.len()));
|
||||
debug.field("hits_info", &hits_info);
|
||||
if *used_negative_operator {
|
||||
debug.field("used_negative_operator", used_negative_operator);
|
||||
}
|
||||
if *degraded {
|
||||
debug.field("degraded", degraded);
|
||||
}
|
||||
if let Some(facet_distribution) = facet_distribution {
|
||||
debug.field("facet_distribution", &facet_distribution);
|
||||
}
|
||||
if let Some(facet_stats) = facet_stats {
|
||||
debug.field("facet_stats", &facet_stats);
|
||||
}
|
||||
if let Some(semantic_hit_count) = semantic_hit_count {
|
||||
debug.field("semantic_hit_count", &semantic_hit_count);
|
||||
}
|
||||
if !facets_by_index.is_empty() {
|
||||
debug.field("facets_by_index", &facets_by_index);
|
||||
}
|
||||
|
||||
debug.finish()
|
||||
}
|
||||
}
|
||||
|
||||
struct WeightedScore<'a> {
|
||||
details: &'a [ScoreDetails],
|
||||
weight: f64,
|
||||
}
|
||||
|
||||
impl<'a> WeightedScore<'a> {
|
||||
pub fn new(details: &'a [ScoreDetails], weight: f64) -> Self {
|
||||
Self { details, weight }
|
||||
}
|
||||
|
||||
pub fn weighted_global_score(&self) -> f64 {
|
||||
ScoreDetails::global_score(self.details.iter()) * self.weight
|
||||
}
|
||||
|
||||
pub fn compare_weighted_global_scores(&self, other: &Self) -> Ordering {
|
||||
self.weighted_global_score()
|
||||
.partial_cmp(&other.weighted_global_score())
|
||||
// both are numbers, possibly infinite
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
pub fn compare(&self, other: &Self) -> Ordering {
|
||||
let mut left_it = ScoreDetails::score_values(self.details.iter());
|
||||
let mut right_it = ScoreDetails::score_values(other.details.iter());
|
||||
|
||||
loop {
|
||||
let left = left_it.next();
|
||||
let right = right_it.next();
|
||||
|
||||
match (left, right) {
|
||||
(None, None) => return Ordering::Equal,
|
||||
(None, Some(_)) => return Ordering::Less,
|
||||
(Some(_), None) => return Ordering::Greater,
|
||||
(Some(ScoreValue::Score(left)), Some(ScoreValue::Score(right))) => {
|
||||
let left = left * self.weight;
|
||||
let right = right * other.weight;
|
||||
if (left - right).abs() <= f64::EPSILON {
|
||||
continue;
|
||||
}
|
||||
return left.partial_cmp(&right).unwrap();
|
||||
}
|
||||
(Some(ScoreValue::Sort(left)), Some(ScoreValue::Sort(right))) => {
|
||||
match left.partial_cmp(right) {
|
||||
Some(Ordering::Equal) => continue,
|
||||
Some(order) => return order,
|
||||
None => return self.compare_weighted_global_scores(other),
|
||||
}
|
||||
}
|
||||
(Some(ScoreValue::GeoSort(left)), Some(ScoreValue::GeoSort(right))) => {
|
||||
match left.partial_cmp(right) {
|
||||
Some(Ordering::Equal) => continue,
|
||||
Some(order) => return order,
|
||||
None => {
|
||||
return self.compare_weighted_global_scores(other);
|
||||
}
|
||||
}
|
||||
}
|
||||
// not comparable details, use global
|
||||
(Some(ScoreValue::Score(_)), Some(_))
|
||||
| (Some(_), Some(ScoreValue::Score(_)))
|
||||
| (Some(ScoreValue::GeoSort(_)), Some(ScoreValue::Sort(_)))
|
||||
| (Some(ScoreValue::Sort(_)), Some(ScoreValue::GeoSort(_))) => {
|
||||
let left_count = left_it.count();
|
||||
let right_count = right_it.count();
|
||||
// compare how many remaining groups of rules each side has.
|
||||
// the group with the most remaining groups wins.
|
||||
return left_count
|
||||
.cmp(&right_count)
|
||||
// breaks ties with the global ranking score
|
||||
.then_with(|| self.compare_weighted_global_scores(other));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct QueryByIndex {
|
||||
query: SearchQuery,
|
||||
federation_options: FederationOptions,
|
||||
query_index: usize,
|
||||
}
|
||||
|
||||
struct SearchResultByQuery<'a> {
|
||||
documents_ids: Vec<DocumentId>,
|
||||
document_scores: Vec<Vec<ScoreDetails>>,
|
||||
federation_options: FederationOptions,
|
||||
hit_maker: HitMaker<'a>,
|
||||
query_index: usize,
|
||||
}
|
||||
|
||||
struct SearchResultByQueryIter<'a> {
|
||||
it: Zip<IntoIter<DocumentId>, IntoIter<Vec<ScoreDetails>>>,
|
||||
federation_options: FederationOptions,
|
||||
hit_maker: Rc<HitMaker<'a>>,
|
||||
query_index: usize,
|
||||
}
|
||||
|
||||
impl<'a> SearchResultByQueryIter<'a> {
|
||||
fn new(
|
||||
SearchResultByQuery {
|
||||
documents_ids,
|
||||
document_scores,
|
||||
federation_options,
|
||||
hit_maker,
|
||||
query_index,
|
||||
}: SearchResultByQuery<'a>,
|
||||
) -> Self {
|
||||
let it = documents_ids.into_iter().zip(document_scores);
|
||||
Self { it, federation_options, hit_maker: Rc::new(hit_maker), query_index }
|
||||
}
|
||||
}
|
||||
|
||||
struct SearchResultByQueryIterItem<'a> {
|
||||
docid: DocumentId,
|
||||
score: Vec<ScoreDetails>,
|
||||
federation_options: FederationOptions,
|
||||
hit_maker: Rc<HitMaker<'a>>,
|
||||
query_index: usize,
|
||||
}
|
||||
|
||||
fn merge_index_local_results(
|
||||
results_by_query: Vec<SearchResultByQuery<'_>>,
|
||||
) -> impl Iterator<Item = SearchResultByQueryIterItem> + '_ {
|
||||
itertools::kmerge_by(
|
||||
results_by_query.into_iter().map(SearchResultByQueryIter::new),
|
||||
|left: &SearchResultByQueryIterItem, right: &SearchResultByQueryIterItem| {
|
||||
let left_score = WeightedScore::new(&left.score, *left.federation_options.weight);
|
||||
let right_score = WeightedScore::new(&right.score, *right.federation_options.weight);
|
||||
|
||||
match left_score.compare(&right_score) {
|
||||
// the biggest score goes first
|
||||
Ordering::Greater => true,
|
||||
// break ties using query index
|
||||
Ordering::Equal => left.query_index < right.query_index,
|
||||
Ordering::Less => false,
|
||||
}
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
fn merge_index_global_results(
|
||||
results_by_index: Vec<SearchResultByIndex>,
|
||||
) -> impl Iterator<Item = SearchHitByIndex> {
|
||||
itertools::kmerge_by(
|
||||
results_by_index.into_iter().map(|result_by_index| result_by_index.hits.into_iter()),
|
||||
|left: &SearchHitByIndex, right: &SearchHitByIndex| {
|
||||
let left_score = WeightedScore::new(&left.score, *left.federation_options.weight);
|
||||
let right_score = WeightedScore::new(&right.score, *right.federation_options.weight);
|
||||
|
||||
match left_score.compare(&right_score) {
|
||||
// the biggest score goes first
|
||||
Ordering::Greater => true,
|
||||
// break ties using query index
|
||||
Ordering::Equal => left.query_index < right.query_index,
|
||||
Ordering::Less => false,
|
||||
}
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
impl<'a> Iterator for SearchResultByQueryIter<'a> {
|
||||
type Item = SearchResultByQueryIterItem<'a>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let (docid, score) = self.it.next()?;
|
||||
Some(SearchResultByQueryIterItem {
|
||||
docid,
|
||||
score,
|
||||
federation_options: self.federation_options,
|
||||
hit_maker: Rc::clone(&self.hit_maker),
|
||||
query_index: self.query_index,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
struct SearchHitByIndex {
|
||||
hit: SearchHit,
|
||||
score: Vec<ScoreDetails>,
|
||||
federation_options: FederationOptions,
|
||||
query_index: usize,
|
||||
}
|
||||
|
||||
struct SearchResultByIndex {
|
||||
index: String,
|
||||
hits: Vec<SearchHitByIndex>,
|
||||
estimated_total_hits: usize,
|
||||
degraded: bool,
|
||||
used_negative_operator: bool,
|
||||
facets: Option<ComputedFacets>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, Serialize, ToSchema)]
|
||||
pub struct FederatedFacets(pub BTreeMap<String, ComputedFacets>);
|
||||
|
||||
impl FederatedFacets {
|
||||
pub fn insert(&mut self, index: String, facets: Option<ComputedFacets>) {
|
||||
if let Some(facets) = facets {
|
||||
self.0.insert(index, facets);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.0.is_empty()
|
||||
}
|
||||
|
||||
pub fn merge(
|
||||
self,
|
||||
MergeFacets { max_values_per_facet }: MergeFacets,
|
||||
facet_order: BTreeMap<String, (String, OrderBy)>,
|
||||
) -> Option<ComputedFacets> {
|
||||
if self.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut distribution: BTreeMap<String, _> = Default::default();
|
||||
let mut stats: BTreeMap<String, FacetStats> = Default::default();
|
||||
|
||||
for facets_by_index in self.0.into_values() {
|
||||
for (facet, index_distribution) in facets_by_index.distribution {
|
||||
match distribution.entry(facet) {
|
||||
std::collections::btree_map::Entry::Vacant(entry) => {
|
||||
entry.insert(index_distribution);
|
||||
}
|
||||
std::collections::btree_map::Entry::Occupied(mut entry) => {
|
||||
let distribution = entry.get_mut();
|
||||
|
||||
for (value, index_count) in index_distribution {
|
||||
distribution
|
||||
.entry(value)
|
||||
.and_modify(|count| *count += index_count)
|
||||
.or_insert(index_count);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (facet, index_stats) in facets_by_index.stats {
|
||||
match stats.entry(facet) {
|
||||
std::collections::btree_map::Entry::Vacant(entry) => {
|
||||
entry.insert(index_stats);
|
||||
}
|
||||
std::collections::btree_map::Entry::Occupied(mut entry) => {
|
||||
let stats = entry.get_mut();
|
||||
|
||||
stats.min = f64::min(stats.min, index_stats.min);
|
||||
stats.max = f64::max(stats.max, index_stats.max);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// fixup order
|
||||
for (facet, values) in &mut distribution {
|
||||
let order_by = facet_order.get(facet).map(|(_, order)| *order).unwrap_or_default();
|
||||
|
||||
match order_by {
|
||||
OrderBy::Lexicographic => {
|
||||
values.sort_unstable_by(|left, _, right, _| left.cmp(right))
|
||||
}
|
||||
OrderBy::Count => {
|
||||
values.sort_unstable_by(|_, left, _, right| {
|
||||
left.cmp(right)
|
||||
// biggest first
|
||||
.reverse()
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(max_values_per_facet) = max_values_per_facet {
|
||||
values.truncate(max_values_per_facet)
|
||||
};
|
||||
}
|
||||
|
||||
Some(ComputedFacets { distribution, stats })
|
||||
}
|
||||
}
|
||||
|
||||
pub fn perform_federated_search(
|
||||
index_scheduler: &IndexScheduler,
|
||||
queries: Vec<SearchQueryWithIndex>,
|
||||
mut federation: Federation,
|
||||
features: RoFeatures,
|
||||
) -> Result<FederatedSearchResult, ResponseError> {
|
||||
let before_search = std::time::Instant::now();
|
||||
|
||||
// this implementation partition the queries by index to guarantee an important property:
|
||||
// - all the queries to a particular index use the same read transaction.
|
||||
// This is an important property, otherwise we cannot guarantee the self-consistency of the results.
|
||||
|
||||
// 1. partition queries by index
|
||||
let mut queries_by_index: BTreeMap<String, Vec<QueryByIndex>> = Default::default();
|
||||
for (query_index, federated_query) in queries.into_iter().enumerate() {
|
||||
if let Some(pagination_field) = federated_query.has_pagination() {
|
||||
return Err(MeilisearchHttpError::PaginationInFederatedQuery(
|
||||
query_index,
|
||||
pagination_field,
|
||||
)
|
||||
.into());
|
||||
}
|
||||
|
||||
if let Some(facets) = federated_query.has_facets() {
|
||||
let facets = facets.to_owned();
|
||||
return Err(MeilisearchHttpError::FacetsInFederatedQuery(
|
||||
query_index,
|
||||
federated_query.index_uid.into_inner(),
|
||||
facets,
|
||||
)
|
||||
.into());
|
||||
}
|
||||
|
||||
let (index_uid, query, federation_options) = federated_query.into_index_query_federation();
|
||||
|
||||
queries_by_index.entry(index_uid.into_inner()).or_default().push(QueryByIndex {
|
||||
query,
|
||||
federation_options: federation_options.unwrap_or_default(),
|
||||
query_index,
|
||||
})
|
||||
}
|
||||
|
||||
// 2. perform queries, merge and make hits index by index
|
||||
let required_hit_count = federation.limit + federation.offset;
|
||||
|
||||
// In step (2), semantic_hit_count will be set to Some(0) if any search kind uses semantic
|
||||
// Then in step (3), we'll update its value if there is any semantic search
|
||||
let mut semantic_hit_count = None;
|
||||
let mut results_by_index = Vec::with_capacity(queries_by_index.len());
|
||||
let mut previous_query_data: Option<(RankingRules, usize, String)> = None;
|
||||
|
||||
// remember the order and name of first index for each facet when merging with index settings
|
||||
// to detect if the order is inconsistent for a facet.
|
||||
let mut facet_order: Option<BTreeMap<String, (String, OrderBy)>> = match federation.merge_facets
|
||||
{
|
||||
Some(MergeFacets { .. }) => Some(Default::default()),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
for (index_uid, queries) in queries_by_index {
|
||||
let first_query_index = queries.first().map(|query| query.query_index);
|
||||
|
||||
let index = match index_scheduler.index(&index_uid) {
|
||||
Ok(index) => index,
|
||||
Err(err) => {
|
||||
let mut err = ResponseError::from(err);
|
||||
// Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but
|
||||
// here the resource not found is not part of the URL.
|
||||
err.code = StatusCode::BAD_REQUEST;
|
||||
if let Some(query_index) = first_query_index {
|
||||
err.message = format!("Inside `.queries[{}]`: {}", query_index, err.message);
|
||||
}
|
||||
return Err(err);
|
||||
}
|
||||
};
|
||||
|
||||
// Important: this is the only transaction we'll use for this index during this federated search
|
||||
let rtxn = index.read_txn()?;
|
||||
|
||||
let criteria = index.criteria(&rtxn)?;
|
||||
|
||||
let dictionary = index.dictionary(&rtxn)?;
|
||||
let dictionary: Option<Vec<_>> =
|
||||
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||
let separators = index.allowed_separators(&rtxn)?;
|
||||
let separators: Option<Vec<_>> =
|
||||
separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||
|
||||
// each query gets its individual cutoff
|
||||
let cutoff = index.search_cutoff(&rtxn)?;
|
||||
|
||||
let mut degraded = false;
|
||||
let mut used_negative_operator = false;
|
||||
let mut candidates = RoaringBitmap::new();
|
||||
|
||||
let facets_by_index = federation.facets_by_index.remove(&index_uid).flatten();
|
||||
|
||||
// TODO: recover the max size + facets_by_index as return value of this function so as not to ask it for all queries
|
||||
if let Err(mut error) =
|
||||
check_facet_order(&mut facet_order, &index_uid, &facets_by_index, &index, &rtxn)
|
||||
{
|
||||
error.message = format!(
|
||||
"Inside `.federation.facetsByIndex.{index_uid}`: {error}{}",
|
||||
if let Some(query_index) = first_query_index {
|
||||
format!("\n - Note: index `{index_uid}` used in `.queries[{query_index}]`")
|
||||
} else {
|
||||
Default::default()
|
||||
}
|
||||
);
|
||||
return Err(error);
|
||||
}
|
||||
|
||||
// 2.1. Compute all candidates for each query in the index
|
||||
let mut results_by_query = Vec::with_capacity(queries.len());
|
||||
|
||||
for QueryByIndex { query, federation_options, query_index } in queries {
|
||||
// use an immediately invoked lambda to capture the result without returning from the function
|
||||
|
||||
let res: Result<(), ResponseError> = (|| {
|
||||
let search_kind =
|
||||
search_kind(&query, index_scheduler, index_uid.to_string(), &index)?;
|
||||
|
||||
let canonicalization_kind = match (&search_kind, &query.q) {
|
||||
(SearchKind::SemanticOnly { .. }, _) => {
|
||||
ranking_rules::CanonicalizationKind::Vector
|
||||
}
|
||||
(_, Some(q)) if !q.is_empty() => ranking_rules::CanonicalizationKind::Keyword,
|
||||
_ => ranking_rules::CanonicalizationKind::Placeholder,
|
||||
};
|
||||
|
||||
let sort = if let Some(sort) = &query.sort {
|
||||
let sorts: Vec<_> =
|
||||
match sort.iter().map(|s| milli::AscDesc::from_str(s)).collect() {
|
||||
Ok(sorts) => sorts,
|
||||
Err(asc_desc_error) => {
|
||||
return Err(milli::Error::from(milli::SortError::from(
|
||||
asc_desc_error,
|
||||
))
|
||||
.into())
|
||||
}
|
||||
};
|
||||
Some(sorts)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let ranking_rules = ranking_rules::RankingRules::new(
|
||||
criteria.clone(),
|
||||
sort,
|
||||
query.matching_strategy.into(),
|
||||
canonicalization_kind,
|
||||
);
|
||||
|
||||
if let Some((previous_ranking_rules, previous_query_index, previous_index_uid)) =
|
||||
previous_query_data.take()
|
||||
{
|
||||
if let Err(error) = ranking_rules.is_compatible_with(&previous_ranking_rules) {
|
||||
return Err(error.to_response_error(
|
||||
&ranking_rules,
|
||||
&previous_ranking_rules,
|
||||
query_index,
|
||||
previous_query_index,
|
||||
&index_uid,
|
||||
&previous_index_uid,
|
||||
));
|
||||
}
|
||||
previous_query_data = if previous_ranking_rules.constraint_count()
|
||||
> ranking_rules.constraint_count()
|
||||
{
|
||||
Some((previous_ranking_rules, previous_query_index, previous_index_uid))
|
||||
} else {
|
||||
Some((ranking_rules, query_index, index_uid.clone()))
|
||||
};
|
||||
} else {
|
||||
previous_query_data = Some((ranking_rules, query_index, index_uid.clone()));
|
||||
}
|
||||
|
||||
match search_kind {
|
||||
SearchKind::KeywordOnly => {}
|
||||
_ => semantic_hit_count = Some(0),
|
||||
}
|
||||
|
||||
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors);
|
||||
|
||||
let time_budget = match cutoff {
|
||||
Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)),
|
||||
None => TimeBudget::default(),
|
||||
};
|
||||
|
||||
let (mut search, _is_finite_pagination, _max_total_hits, _offset) =
|
||||
prepare_search(&index, &rtxn, &query, &search_kind, time_budget, features)?;
|
||||
|
||||
search.scoring_strategy(milli::score_details::ScoringStrategy::Detailed);
|
||||
search.offset(0);
|
||||
search.limit(required_hit_count);
|
||||
|
||||
let (result, _semantic_hit_count) =
|
||||
super::search_from_kind(index_uid.to_string(), search_kind, search)?;
|
||||
let format = AttributesFormat {
|
||||
attributes_to_retrieve: query.attributes_to_retrieve,
|
||||
retrieve_vectors,
|
||||
attributes_to_highlight: query.attributes_to_highlight,
|
||||
attributes_to_crop: query.attributes_to_crop,
|
||||
crop_length: query.crop_length,
|
||||
crop_marker: query.crop_marker,
|
||||
highlight_pre_tag: query.highlight_pre_tag,
|
||||
highlight_post_tag: query.highlight_post_tag,
|
||||
show_matches_position: query.show_matches_position,
|
||||
sort: query.sort,
|
||||
show_ranking_score: query.show_ranking_score,
|
||||
show_ranking_score_details: query.show_ranking_score_details,
|
||||
locales: query.locales.map(|l| l.iter().copied().map(Into::into).collect()),
|
||||
};
|
||||
|
||||
let milli::SearchResult {
|
||||
matching_words,
|
||||
candidates: query_candidates,
|
||||
documents_ids,
|
||||
document_scores,
|
||||
degraded: query_degraded,
|
||||
used_negative_operator: query_used_negative_operator,
|
||||
} = result;
|
||||
|
||||
candidates |= query_candidates;
|
||||
degraded |= query_degraded;
|
||||
used_negative_operator |= query_used_negative_operator;
|
||||
|
||||
let tokenizer = HitMaker::tokenizer(dictionary.as_deref(), separators.as_deref());
|
||||
|
||||
let formatter_builder = HitMaker::formatter_builder(matching_words, tokenizer);
|
||||
|
||||
let hit_maker =
|
||||
HitMaker::new(&index, &rtxn, format, formatter_builder).map_err(|e| {
|
||||
MeilisearchHttpError::from_milli(e, Some(index_uid.to_string()))
|
||||
})?;
|
||||
|
||||
results_by_query.push(SearchResultByQuery {
|
||||
federation_options,
|
||||
hit_maker,
|
||||
query_index,
|
||||
documents_ids,
|
||||
document_scores,
|
||||
});
|
||||
Ok(())
|
||||
})();
|
||||
|
||||
if let Err(mut error) = res {
|
||||
error.message = format!("Inside `.queries[{query_index}]`: {}", error.message);
|
||||
return Err(error);
|
||||
}
|
||||
}
|
||||
// 2.2. merge inside index
|
||||
let mut documents_seen = RoaringBitmap::new();
|
||||
let merged_result: Result<Vec<_>, ResponseError> =
|
||||
merge_index_local_results(results_by_query)
|
||||
// skip documents we've already seen & mark that we saw the current document
|
||||
.filter(|SearchResultByQueryIterItem { docid, .. }| documents_seen.insert(*docid))
|
||||
.take(required_hit_count)
|
||||
// 2.3 make hits
|
||||
.map(
|
||||
|SearchResultByQueryIterItem {
|
||||
docid,
|
||||
score,
|
||||
federation_options,
|
||||
hit_maker,
|
||||
query_index,
|
||||
}| {
|
||||
let mut hit = hit_maker.make_hit(docid, &score)?;
|
||||
let weighted_score =
|
||||
ScoreDetails::global_score(score.iter()) * (*federation_options.weight);
|
||||
|
||||
let _federation = serde_json::json!(
|
||||
{
|
||||
"indexUid": index_uid,
|
||||
"queriesPosition": query_index,
|
||||
"weightedRankingScore": weighted_score,
|
||||
}
|
||||
);
|
||||
hit.document.insert("_federation".to_string(), _federation);
|
||||
Ok(SearchHitByIndex { hit, score, federation_options, query_index })
|
||||
},
|
||||
)
|
||||
.collect();
|
||||
|
||||
let merged_result = merged_result?;
|
||||
|
||||
let estimated_total_hits = candidates.len() as usize;
|
||||
|
||||
let facets = facets_by_index
|
||||
.map(|facets_by_index| {
|
||||
compute_facet_distribution_stats(
|
||||
&facets_by_index,
|
||||
&index,
|
||||
&rtxn,
|
||||
candidates,
|
||||
super::Route::MultiSearch,
|
||||
)
|
||||
})
|
||||
.transpose()
|
||||
.map_err(|mut error| {
|
||||
error.message = format!(
|
||||
"Inside `.federation.facetsByIndex.{index_uid}`: {}{}",
|
||||
error.message,
|
||||
if let Some(query_index) = first_query_index {
|
||||
format!("\n - Note: index `{index_uid}` used in `.queries[{query_index}]`")
|
||||
} else {
|
||||
Default::default()
|
||||
}
|
||||
);
|
||||
error
|
||||
})?;
|
||||
|
||||
results_by_index.push(SearchResultByIndex {
|
||||
index: index_uid,
|
||||
hits: merged_result,
|
||||
estimated_total_hits,
|
||||
degraded,
|
||||
used_negative_operator,
|
||||
facets,
|
||||
});
|
||||
}
|
||||
|
||||
// bonus step, make sure to return an error if an index wants a non-faceted field, even if no query actually uses that index.
|
||||
for (index_uid, facets) in federation.facets_by_index {
|
||||
let index = match index_scheduler.index(&index_uid) {
|
||||
Ok(index) => index,
|
||||
Err(err) => {
|
||||
let mut err = ResponseError::from(err);
|
||||
// Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but
|
||||
// here the resource not found is not part of the URL.
|
||||
err.code = StatusCode::BAD_REQUEST;
|
||||
err.message = format!(
|
||||
"Inside `.federation.facetsByIndex.{index_uid}`: {}\n - Note: index `{index_uid}` is not used in queries",
|
||||
err.message
|
||||
);
|
||||
return Err(err);
|
||||
}
|
||||
};
|
||||
|
||||
// Important: this is the only transaction we'll use for this index during this federated search
|
||||
let rtxn = index.read_txn()?;
|
||||
|
||||
if let Err(mut error) =
|
||||
check_facet_order(&mut facet_order, &index_uid, &facets, &index, &rtxn)
|
||||
{
|
||||
error.message = format!(
|
||||
"Inside `.federation.facetsByIndex.{index_uid}`: {error}\n - Note: index `{index_uid}` is not used in queries",
|
||||
);
|
||||
return Err(error);
|
||||
}
|
||||
|
||||
if let Some(facets) = facets {
|
||||
if let Err(mut error) = compute_facet_distribution_stats(
|
||||
&facets,
|
||||
&index,
|
||||
&rtxn,
|
||||
Default::default(),
|
||||
super::Route::MultiSearch,
|
||||
) {
|
||||
error.message =
|
||||
format!("Inside `.federation.facetsByIndex.{index_uid}`: {}\n - Note: index `{index_uid}` is not used in queries", error.message);
|
||||
return Err(error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 3. merge hits and metadata across indexes
|
||||
// 3.1 merge metadata
|
||||
let (estimated_total_hits, degraded, used_negative_operator, facets) = {
|
||||
let mut estimated_total_hits = 0;
|
||||
let mut degraded = false;
|
||||
let mut used_negative_operator = false;
|
||||
|
||||
let mut facets: FederatedFacets = FederatedFacets::default();
|
||||
|
||||
for SearchResultByIndex {
|
||||
index,
|
||||
hits: _,
|
||||
estimated_total_hits: estimated_total_hits_by_index,
|
||||
facets: facets_by_index,
|
||||
degraded: degraded_by_index,
|
||||
used_negative_operator: used_negative_operator_by_index,
|
||||
} in &mut results_by_index
|
||||
{
|
||||
estimated_total_hits += *estimated_total_hits_by_index;
|
||||
degraded |= *degraded_by_index;
|
||||
used_negative_operator |= *used_negative_operator_by_index;
|
||||
|
||||
let facets_by_index = std::mem::take(facets_by_index);
|
||||
let index = std::mem::take(index);
|
||||
|
||||
facets.insert(index, facets_by_index);
|
||||
}
|
||||
|
||||
(estimated_total_hits, degraded, used_negative_operator, facets)
|
||||
};
|
||||
|
||||
// 3.2 merge hits
|
||||
let merged_hits: Vec<_> = merge_index_global_results(results_by_index)
|
||||
.skip(federation.offset)
|
||||
.take(federation.limit)
|
||||
.inspect(|hit| {
|
||||
if let Some(semantic_hit_count) = &mut semantic_hit_count {
|
||||
if hit.score.iter().any(|score| matches!(&score, ScoreDetails::Vector(_))) {
|
||||
*semantic_hit_count += 1;
|
||||
}
|
||||
}
|
||||
})
|
||||
.map(|hit| hit.hit)
|
||||
.collect();
|
||||
|
||||
let (facet_distribution, facet_stats, facets_by_index) =
|
||||
match federation.merge_facets.zip(facet_order) {
|
||||
Some((merge_facets, facet_order)) => {
|
||||
let facets = facets.merge(merge_facets, facet_order);
|
||||
|
||||
let (facet_distribution, facet_stats) = facets
|
||||
.map(|ComputedFacets { distribution, stats }| (distribution, stats))
|
||||
.unzip();
|
||||
|
||||
(facet_distribution, facet_stats, FederatedFacets::default())
|
||||
}
|
||||
None => (None, None, facets),
|
||||
};
|
||||
|
||||
let search_result = FederatedSearchResult {
|
||||
hits: merged_hits,
|
||||
processing_time_ms: before_search.elapsed().as_millis(),
|
||||
hits_info: HitsInfo::OffsetLimit {
|
||||
limit: federation.limit,
|
||||
offset: federation.offset,
|
||||
estimated_total_hits,
|
||||
},
|
||||
semantic_hit_count,
|
||||
degraded,
|
||||
used_negative_operator,
|
||||
facet_distribution,
|
||||
facet_stats,
|
||||
facets_by_index,
|
||||
};
|
||||
|
||||
Ok(search_result)
|
||||
}
|
||||
|
||||
fn check_facet_order(
|
||||
facet_order: &mut Option<BTreeMap<String, (String, OrderBy)>>,
|
||||
current_index: &str,
|
||||
facets_by_index: &Option<Vec<String>>,
|
||||
index: &milli::Index,
|
||||
rtxn: &milli::heed::RoTxn<'_>,
|
||||
) -> Result<(), ResponseError> {
|
||||
if let (Some(facet_order), Some(facets_by_index)) = (facet_order, facets_by_index) {
|
||||
let index_facet_order = index.sort_facet_values_by(rtxn)?;
|
||||
for facet in facets_by_index {
|
||||
let index_facet_order = index_facet_order.get(facet);
|
||||
let (previous_index, previous_facet_order) = facet_order
|
||||
.entry(facet.to_owned())
|
||||
.or_insert_with(|| (current_index.to_owned(), index_facet_order));
|
||||
if previous_facet_order != &index_facet_order {
|
||||
return Err(MeilisearchHttpError::InconsistentFacetOrder {
|
||||
facet: facet.clone(),
|
||||
previous_facet_order: *previous_facet_order,
|
||||
previous_uid: previous_index.clone(),
|
||||
current_uid: current_index.to_owned(),
|
||||
index_facet_order,
|
||||
}
|
||||
.into());
|
||||
}
|
||||
}
|
||||
};
|
||||
Ok(())
|
||||
}
|
10
crates/meilisearch/src/search/federated/mod.rs
Normal file
10
crates/meilisearch/src/search/federated/mod.rs
Normal file
@ -0,0 +1,10 @@
|
||||
mod perform;
|
||||
mod proxy;
|
||||
mod types;
|
||||
mod weighted_scores;
|
||||
|
||||
pub use perform::perform_federated_search;
|
||||
pub use proxy::{PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE};
|
||||
pub use types::{
|
||||
FederatedSearch, FederatedSearchResult, Federation, FederationOptions, MergeFacets,
|
||||
};
|
1112
crates/meilisearch/src/search/federated/perform.rs
Normal file
1112
crates/meilisearch/src/search/federated/perform.rs
Normal file
File diff suppressed because it is too large
Load Diff
267
crates/meilisearch/src/search/federated/proxy.rs
Normal file
267
crates/meilisearch/src/search/federated/proxy.rs
Normal file
@ -0,0 +1,267 @@
|
||||
pub use error::ProxySearchError;
|
||||
use error::ReqwestErrorWithoutUrl;
|
||||
use meilisearch_types::features::Remote;
|
||||
use rand::Rng as _;
|
||||
use reqwest::{Client, Response, StatusCode};
|
||||
use serde::de::DeserializeOwned;
|
||||
use serde_json::Value;
|
||||
|
||||
use super::types::{FederatedSearch, FederatedSearchResult, Federation};
|
||||
use crate::search::SearchQueryWithIndex;
|
||||
|
||||
pub const PROXY_SEARCH_HEADER: &str = "Meili-Proxy-Search";
|
||||
pub const PROXY_SEARCH_HEADER_VALUE: &str = "true";
|
||||
|
||||
mod error {
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use reqwest::StatusCode;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum ProxySearchError {
|
||||
#[error("{0}")]
|
||||
CouldNotSendRequest(ReqwestErrorWithoutUrl),
|
||||
#[error("could not authenticate against the remote host\n - hint: check that the remote instance was registered with a valid API key having the `search` action")]
|
||||
AuthenticationError,
|
||||
#[error(
|
||||
"could not parse response from the remote host as a federated search response{}\n - hint: check that the remote instance is a Meilisearch instance running the same version",
|
||||
response_from_remote(response)
|
||||
)]
|
||||
CouldNotParseResponse { response: Result<String, ReqwestErrorWithoutUrl> },
|
||||
#[error("remote host responded with code {}{}\n - hint: check that the remote instance has the correct index configuration for that request\n - hint: check that the `network` experimental feature is enabled on the remote instance", status_code.as_u16(), response_from_remote(response))]
|
||||
BadRequest { status_code: StatusCode, response: Result<String, ReqwestErrorWithoutUrl> },
|
||||
#[error("remote host did not answer before the deadline")]
|
||||
Timeout,
|
||||
#[error("remote hit does not contain `{0}`\n - hint: check that the remote instance is a Meilisearch instance running the same version")]
|
||||
MissingPathInResponse(&'static str),
|
||||
#[error("remote host responded with code {}{}", status_code.as_u16(), response_from_remote(response))]
|
||||
RemoteError { status_code: StatusCode, response: Result<String, ReqwestErrorWithoutUrl> },
|
||||
#[error("remote hit contains an unexpected value at path `{path}`: expected {expected_type}, received `{received_value}`\n - hint: check that the remote instance is a Meilisearch instance running the same version")]
|
||||
UnexpectedValueInPath {
|
||||
path: &'static str,
|
||||
expected_type: &'static str,
|
||||
received_value: String,
|
||||
},
|
||||
#[error("could not parse weighted score values in the remote hit: {0}")]
|
||||
CouldNotParseWeightedScoreValues(serde_json::Error),
|
||||
}
|
||||
|
||||
impl ProxySearchError {
|
||||
pub fn as_response_error(&self) -> ResponseError {
|
||||
use meilisearch_types::error::Code;
|
||||
let message = self.to_string();
|
||||
let code = match self {
|
||||
ProxySearchError::CouldNotSendRequest(_) => Code::RemoteCouldNotSendRequest,
|
||||
ProxySearchError::AuthenticationError => Code::RemoteInvalidApiKey,
|
||||
ProxySearchError::BadRequest { .. } => Code::RemoteBadRequest,
|
||||
ProxySearchError::Timeout => Code::RemoteTimeout,
|
||||
ProxySearchError::RemoteError { .. } => Code::RemoteRemoteError,
|
||||
ProxySearchError::CouldNotParseResponse { .. }
|
||||
| ProxySearchError::MissingPathInResponse(_)
|
||||
| ProxySearchError::UnexpectedValueInPath { .. }
|
||||
| ProxySearchError::CouldNotParseWeightedScoreValues(_) => Code::RemoteBadResponse,
|
||||
};
|
||||
ResponseError::from_msg(message, code)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
#[error(transparent)]
|
||||
pub struct ReqwestErrorWithoutUrl(reqwest::Error);
|
||||
impl ReqwestErrorWithoutUrl {
|
||||
pub fn new(inner: reqwest::Error) -> Self {
|
||||
Self(inner.without_url())
|
||||
}
|
||||
}
|
||||
|
||||
fn response_from_remote(response: &Result<String, ReqwestErrorWithoutUrl>) -> String {
|
||||
match response {
|
||||
Ok(response) => {
|
||||
format!(":\n - response from remote: {}", response)
|
||||
}
|
||||
Err(error) => {
|
||||
format!(":\n - additionally, could not retrieve response from remote: {error}")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ProxySearchParams {
|
||||
pub deadline: Option<std::time::Instant>,
|
||||
pub try_count: u32,
|
||||
pub client: reqwest::Client,
|
||||
}
|
||||
|
||||
/// Performs a federated search on a remote host and returns the results
|
||||
pub async fn proxy_search(
|
||||
node: &Remote,
|
||||
queries: Vec<SearchQueryWithIndex>,
|
||||
federation: Federation,
|
||||
params: &ProxySearchParams,
|
||||
) -> Result<FederatedSearchResult, ProxySearchError> {
|
||||
let url = format!("{}/multi-search", node.url);
|
||||
|
||||
let federated = FederatedSearch { queries, federation: Some(federation) };
|
||||
|
||||
let search_api_key = node.search_api_key.as_deref();
|
||||
|
||||
let max_deadline = std::time::Instant::now() + std::time::Duration::from_secs(5);
|
||||
|
||||
let deadline = if let Some(deadline) = params.deadline {
|
||||
std::time::Instant::min(deadline, max_deadline)
|
||||
} else {
|
||||
max_deadline
|
||||
};
|
||||
|
||||
for i in 0..params.try_count {
|
||||
match try_proxy_search(&url, search_api_key, &federated, ¶ms.client, deadline).await {
|
||||
Ok(response) => return Ok(response),
|
||||
Err(retry) => {
|
||||
let duration = retry.into_duration(i)?;
|
||||
tokio::time::sleep(duration).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
try_proxy_search(&url, search_api_key, &federated, ¶ms.client, deadline)
|
||||
.await
|
||||
.map_err(Retry::into_error)
|
||||
}
|
||||
|
||||
async fn try_proxy_search(
|
||||
url: &str,
|
||||
search_api_key: Option<&str>,
|
||||
federated: &FederatedSearch,
|
||||
client: &Client,
|
||||
deadline: std::time::Instant,
|
||||
) -> Result<FederatedSearchResult, Retry> {
|
||||
let timeout = deadline.saturating_duration_since(std::time::Instant::now());
|
||||
|
||||
let request = client.post(url).json(&federated).timeout(timeout);
|
||||
let request = if let Some(search_api_key) = search_api_key {
|
||||
request.bearer_auth(search_api_key)
|
||||
} else {
|
||||
request
|
||||
};
|
||||
let request = request.header(PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE);
|
||||
|
||||
let response = request.send().await;
|
||||
let response = match response {
|
||||
Ok(response) => response,
|
||||
Err(error) if error.is_timeout() => return Err(Retry::give_up(ProxySearchError::Timeout)),
|
||||
Err(error) => {
|
||||
return Err(Retry::retry_later(ProxySearchError::CouldNotSendRequest(
|
||||
ReqwestErrorWithoutUrl::new(error),
|
||||
)))
|
||||
}
|
||||
};
|
||||
|
||||
match response.status() {
|
||||
status_code if status_code.is_success() => (),
|
||||
StatusCode::UNAUTHORIZED | StatusCode::FORBIDDEN => {
|
||||
return Err(Retry::give_up(ProxySearchError::AuthenticationError))
|
||||
}
|
||||
status_code if status_code.is_client_error() => {
|
||||
let response = parse_error(response).await;
|
||||
return Err(Retry::give_up(ProxySearchError::BadRequest { status_code, response }));
|
||||
}
|
||||
status_code if status_code.is_server_error() => {
|
||||
let response = parse_error(response).await;
|
||||
return Err(Retry::retry_later(ProxySearchError::RemoteError {
|
||||
status_code,
|
||||
response,
|
||||
}));
|
||||
}
|
||||
status_code => {
|
||||
tracing::warn!(
|
||||
status_code = status_code.as_u16(),
|
||||
"remote replied with unexpected status code"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let response = match parse_response(response).await {
|
||||
Ok(response) => response,
|
||||
Err(response) => {
|
||||
return Err(Retry::retry_later(ProxySearchError::CouldNotParseResponse { response }))
|
||||
}
|
||||
};
|
||||
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
/// Always parse the body of the response of a failed request as JSON.
|
||||
async fn parse_error(response: Response) -> Result<String, ReqwestErrorWithoutUrl> {
|
||||
let bytes = match response.bytes().await {
|
||||
Ok(bytes) => bytes,
|
||||
Err(error) => return Err(ReqwestErrorWithoutUrl::new(error)),
|
||||
};
|
||||
|
||||
Ok(parse_bytes_as_error(&bytes))
|
||||
}
|
||||
|
||||
fn parse_bytes_as_error(bytes: &[u8]) -> String {
|
||||
match serde_json::from_slice::<Value>(bytes) {
|
||||
Ok(value) => value.to_string(),
|
||||
Err(_) => String::from_utf8_lossy(bytes).into_owned(),
|
||||
}
|
||||
}
|
||||
|
||||
async fn parse_response<T: DeserializeOwned>(
|
||||
response: Response,
|
||||
) -> Result<T, Result<String, ReqwestErrorWithoutUrl>> {
|
||||
let bytes = match response.bytes().await {
|
||||
Ok(bytes) => bytes,
|
||||
Err(error) => return Err(Err(ReqwestErrorWithoutUrl::new(error))),
|
||||
};
|
||||
|
||||
match serde_json::from_slice::<T>(&bytes) {
|
||||
Ok(value) => Ok(value),
|
||||
Err(_) => Err(Ok(parse_bytes_as_error(&bytes))),
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Retry {
|
||||
error: ProxySearchError,
|
||||
strategy: RetryStrategy,
|
||||
}
|
||||
|
||||
pub enum RetryStrategy {
|
||||
GiveUp,
|
||||
Retry,
|
||||
}
|
||||
|
||||
impl Retry {
|
||||
pub fn give_up(error: ProxySearchError) -> Self {
|
||||
Self { error, strategy: RetryStrategy::GiveUp }
|
||||
}
|
||||
|
||||
pub fn retry_later(error: ProxySearchError) -> Self {
|
||||
Self { error, strategy: RetryStrategy::Retry }
|
||||
}
|
||||
|
||||
pub fn into_duration(self, attempt: u32) -> Result<std::time::Duration, ProxySearchError> {
|
||||
match self.strategy {
|
||||
RetryStrategy::GiveUp => Err(self.error),
|
||||
RetryStrategy::Retry => {
|
||||
let retry_duration = std::time::Duration::from_nanos((10u64).pow(attempt));
|
||||
let retry_duration = retry_duration.min(std::time::Duration::from_millis(100)); // don't wait more than 100ms
|
||||
|
||||
// randomly up to double the retry duration
|
||||
let retry_duration = retry_duration
|
||||
+ rand::thread_rng().gen_range(std::time::Duration::ZERO..retry_duration);
|
||||
|
||||
tracing::warn!(
|
||||
"Attempt #{}, failed with {}, retrying after {}ms.",
|
||||
attempt,
|
||||
self.error,
|
||||
retry_duration.as_millis()
|
||||
);
|
||||
Ok(retry_duration)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_error(self) -> ProxySearchError {
|
||||
self.error
|
||||
}
|
||||
}
|
322
crates/meilisearch/src/search/federated/types.rs
Normal file
322
crates/meilisearch/src/search/federated/types.rs
Normal file
@ -0,0 +1,322 @@
|
||||
use std::collections::btree_map::Entry;
|
||||
use std::collections::BTreeMap;
|
||||
use std::fmt;
|
||||
use std::vec::Vec;
|
||||
|
||||
use indexmap::IndexMap;
|
||||
use meilisearch_types::deserr::DeserrJsonError;
|
||||
use meilisearch_types::error::deserr_codes::{
|
||||
InvalidMultiSearchFacetsByIndex, InvalidMultiSearchMaxValuesPerFacet,
|
||||
InvalidMultiSearchMergeFacets, InvalidMultiSearchQueryPosition, InvalidMultiSearchRemote,
|
||||
InvalidMultiSearchWeight, InvalidSearchLimit, InvalidSearchOffset,
|
||||
};
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::milli::order_by_map::OrderByMap;
|
||||
use meilisearch_types::milli::OrderBy;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use utoipa::ToSchema;
|
||||
|
||||
use super::super::{ComputedFacets, FacetStats, HitsInfo, SearchHit, SearchQueryWithIndex};
|
||||
|
||||
pub const DEFAULT_FEDERATED_WEIGHT: f64 = 1.0;
|
||||
|
||||
// fields in the response
|
||||
pub const FEDERATION_HIT: &str = "_federation";
|
||||
pub const INDEX_UID: &str = "indexUid";
|
||||
pub const QUERIES_POSITION: &str = "queriesPosition";
|
||||
pub const WEIGHTED_RANKING_SCORE: &str = "weightedRankingScore";
|
||||
pub const WEIGHTED_SCORE_VALUES: &str = "weightedScoreValues";
|
||||
pub const FEDERATION_REMOTE: &str = "remote";
|
||||
|
||||
#[derive(Debug, Default, Clone, PartialEq, Serialize, deserr::Deserr, ToSchema)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
|
||||
pub struct FederationOptions {
|
||||
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchWeight>)]
|
||||
#[schema(value_type = f64)]
|
||||
pub weight: Weight,
|
||||
|
||||
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchRemote>)]
|
||||
pub remote: Option<String>,
|
||||
|
||||
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchQueryPosition>)]
|
||||
pub query_position: Option<usize>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Serialize, deserr::Deserr)]
|
||||
#[deserr(try_from(f64) = TryFrom::try_from -> InvalidMultiSearchWeight)]
|
||||
pub struct Weight(f64);
|
||||
|
||||
impl Default for Weight {
|
||||
fn default() -> Self {
|
||||
Weight(DEFAULT_FEDERATED_WEIGHT)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::convert::TryFrom<f64> for Weight {
|
||||
type Error = InvalidMultiSearchWeight;
|
||||
|
||||
fn try_from(f: f64) -> Result<Self, Self::Error> {
|
||||
if f < 0.0 {
|
||||
Err(InvalidMultiSearchWeight)
|
||||
} else {
|
||||
Ok(Weight(f))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Deref for Weight {
|
||||
type Target = f64;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, deserr::Deserr, Serialize, ToSchema)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Federation {
|
||||
#[deserr(default = super::super::DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
|
||||
pub limit: usize,
|
||||
#[deserr(default = super::super::DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
|
||||
pub offset: usize,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchFacetsByIndex>)]
|
||||
pub facets_by_index: BTreeMap<IndexUid, Option<Vec<String>>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchMergeFacets>)]
|
||||
pub merge_facets: Option<MergeFacets>,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, deserr::Deserr, Serialize, Default, ToSchema)]
|
||||
#[deserr(error = DeserrJsonError<InvalidMultiSearchMergeFacets>, rename_all = camelCase, deny_unknown_fields)]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct MergeFacets {
|
||||
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchMaxValuesPerFacet>)]
|
||||
pub max_values_per_facet: Option<usize>,
|
||||
}
|
||||
|
||||
#[derive(Debug, deserr::Deserr, Serialize, ToSchema)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct FederatedSearch {
|
||||
pub queries: Vec<SearchQueryWithIndex>,
|
||||
#[deserr(default)]
|
||||
pub federation: Option<Federation>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
pub struct FederatedSearchResult {
|
||||
pub hits: Vec<SearchHit>,
|
||||
pub processing_time_ms: u128,
|
||||
#[serde(flatten)]
|
||||
pub hits_info: HitsInfo,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub semantic_hit_count: Option<u32>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
#[schema(value_type = Option<BTreeMap<String, BTreeMap<String, u64>>>)]
|
||||
pub facet_distribution: Option<BTreeMap<String, IndexMap<String, u64>>>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub facet_stats: Option<BTreeMap<String, FacetStats>>,
|
||||
#[serde(default, skip_serializing_if = "FederatedFacets::is_empty")]
|
||||
pub facets_by_index: FederatedFacets,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub remote_errors: Option<BTreeMap<String, ResponseError>>,
|
||||
|
||||
// These fields are only used for analytics purposes
|
||||
#[serde(skip)]
|
||||
pub degraded: bool,
|
||||
#[serde(skip)]
|
||||
pub used_negative_operator: bool,
|
||||
}
|
||||
|
||||
impl fmt::Debug for FederatedSearchResult {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let FederatedSearchResult {
|
||||
hits,
|
||||
processing_time_ms,
|
||||
hits_info,
|
||||
semantic_hit_count,
|
||||
degraded,
|
||||
used_negative_operator,
|
||||
facet_distribution,
|
||||
facet_stats,
|
||||
facets_by_index,
|
||||
remote_errors,
|
||||
} = self;
|
||||
|
||||
let mut debug = f.debug_struct("SearchResult");
|
||||
// The most important thing when looking at a search result is the time it took to process
|
||||
debug.field("processing_time_ms", &processing_time_ms);
|
||||
debug.field("hits", &format!("[{} hits returned]", hits.len()));
|
||||
debug.field("hits_info", &hits_info);
|
||||
if *used_negative_operator {
|
||||
debug.field("used_negative_operator", used_negative_operator);
|
||||
}
|
||||
if *degraded {
|
||||
debug.field("degraded", degraded);
|
||||
}
|
||||
if let Some(facet_distribution) = facet_distribution {
|
||||
debug.field("facet_distribution", &facet_distribution);
|
||||
}
|
||||
if let Some(facet_stats) = facet_stats {
|
||||
debug.field("facet_stats", &facet_stats);
|
||||
}
|
||||
if let Some(semantic_hit_count) = semantic_hit_count {
|
||||
debug.field("semantic_hit_count", &semantic_hit_count);
|
||||
}
|
||||
if !facets_by_index.is_empty() {
|
||||
debug.field("facets_by_index", &facets_by_index);
|
||||
}
|
||||
if let Some(remote_errors) = remote_errors {
|
||||
debug.field("remote_errors", &remote_errors);
|
||||
}
|
||||
|
||||
debug.finish()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, ToSchema)]
|
||||
pub struct FederatedFacets(pub BTreeMap<String, ComputedFacets>);
|
||||
|
||||
impl FederatedFacets {
|
||||
pub fn insert(&mut self, index: String, facets: Option<ComputedFacets>) {
|
||||
if let Some(facets) = facets {
|
||||
self.0.insert(index, facets);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.0.is_empty()
|
||||
}
|
||||
|
||||
pub fn merge(
|
||||
self,
|
||||
MergeFacets { max_values_per_facet }: MergeFacets,
|
||||
facet_order: BTreeMap<String, (String, OrderBy)>,
|
||||
) -> Option<ComputedFacets> {
|
||||
if self.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut distribution: BTreeMap<String, _> = Default::default();
|
||||
let mut stats: BTreeMap<String, FacetStats> = Default::default();
|
||||
|
||||
for facets_by_index in self.0.into_values() {
|
||||
for (facet, index_distribution) in facets_by_index.distribution {
|
||||
match distribution.entry(facet) {
|
||||
Entry::Vacant(entry) => {
|
||||
entry.insert(index_distribution);
|
||||
}
|
||||
Entry::Occupied(mut entry) => {
|
||||
let distribution = entry.get_mut();
|
||||
|
||||
for (value, index_count) in index_distribution {
|
||||
distribution
|
||||
.entry(value)
|
||||
.and_modify(|count| *count += index_count)
|
||||
.or_insert(index_count);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (facet, index_stats) in facets_by_index.stats {
|
||||
match stats.entry(facet) {
|
||||
Entry::Vacant(entry) => {
|
||||
entry.insert(index_stats);
|
||||
}
|
||||
Entry::Occupied(mut entry) => {
|
||||
let stats = entry.get_mut();
|
||||
|
||||
stats.min = f64::min(stats.min, index_stats.min);
|
||||
stats.max = f64::max(stats.max, index_stats.max);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// fixup order
|
||||
for (facet, values) in &mut distribution {
|
||||
let order_by = facet_order.get(facet).map(|(_, order)| *order).unwrap_or_default();
|
||||
|
||||
match order_by {
|
||||
OrderBy::Lexicographic => {
|
||||
values.sort_unstable_by(|left, _, right, _| left.cmp(right))
|
||||
}
|
||||
OrderBy::Count => {
|
||||
values.sort_unstable_by(|_, left, _, right| {
|
||||
left.cmp(right)
|
||||
// biggest first
|
||||
.reverse()
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(max_values_per_facet) = max_values_per_facet {
|
||||
values.truncate(max_values_per_facet)
|
||||
};
|
||||
}
|
||||
|
||||
Some(ComputedFacets { distribution, stats })
|
||||
}
|
||||
|
||||
pub(crate) fn append(&mut self, FederatedFacets(remote_facets_by_index): FederatedFacets) {
|
||||
for (index, remote_facets) in remote_facets_by_index {
|
||||
let merged_facets = self.0.entry(index).or_default();
|
||||
|
||||
for (remote_facet, remote_stats) in remote_facets.stats {
|
||||
match merged_facets.stats.entry(remote_facet) {
|
||||
Entry::Vacant(vacant_entry) => {
|
||||
vacant_entry.insert(remote_stats);
|
||||
}
|
||||
Entry::Occupied(mut occupied_entry) => {
|
||||
let stats = occupied_entry.get_mut();
|
||||
stats.min = f64::min(stats.min, remote_stats.min);
|
||||
stats.max = f64::max(stats.max, remote_stats.max);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (remote_facet, remote_values) in remote_facets.distribution {
|
||||
let merged_facet = merged_facets.distribution.entry(remote_facet).or_default();
|
||||
for (remote_value, remote_count) in remote_values {
|
||||
let count = merged_facet.entry(remote_value).or_default();
|
||||
*count += remote_count;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn sort_and_truncate(&mut self, facet_order: BTreeMap<String, (OrderByMap, usize)>) {
|
||||
for (index, facets) in &mut self.0 {
|
||||
let Some((order_by, max_values_per_facet)) = facet_order.get(index) else {
|
||||
continue;
|
||||
};
|
||||
for (facet, values) in &mut facets.distribution {
|
||||
match order_by.get(facet) {
|
||||
OrderBy::Lexicographic => {
|
||||
values.sort_unstable_by(|left, _, right, _| left.cmp(right))
|
||||
}
|
||||
OrderBy::Count => {
|
||||
values.sort_unstable_by(|_, left, _, right| {
|
||||
left.cmp(right)
|
||||
// biggest first
|
||||
.reverse()
|
||||
})
|
||||
}
|
||||
}
|
||||
values.truncate(*max_values_per_facet);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
88
crates/meilisearch/src/search/federated/weighted_scores.rs
Normal file
88
crates/meilisearch/src/search/federated/weighted_scores.rs
Normal file
@ -0,0 +1,88 @@
|
||||
use std::cmp::Ordering;
|
||||
|
||||
use meilisearch_types::milli::score_details::{self, WeightedScoreValue};
|
||||
|
||||
pub fn compare(
|
||||
mut left_it: impl Iterator<Item = WeightedScoreValue>,
|
||||
left_weighted_global_score: f64,
|
||||
mut right_it: impl Iterator<Item = WeightedScoreValue>,
|
||||
right_weighted_global_score: f64,
|
||||
) -> Ordering {
|
||||
loop {
|
||||
let left = left_it.next();
|
||||
let right = right_it.next();
|
||||
|
||||
match (left, right) {
|
||||
(None, None) => return Ordering::Equal,
|
||||
(None, Some(_)) => return Ordering::Less,
|
||||
(Some(_), None) => return Ordering::Greater,
|
||||
(
|
||||
Some(
|
||||
WeightedScoreValue::WeightedScore(left) | WeightedScoreValue::VectorSort(left),
|
||||
),
|
||||
Some(
|
||||
WeightedScoreValue::WeightedScore(right)
|
||||
| WeightedScoreValue::VectorSort(right),
|
||||
),
|
||||
) => {
|
||||
if (left - right).abs() <= f64::EPSILON {
|
||||
continue;
|
||||
}
|
||||
return left.partial_cmp(&right).unwrap();
|
||||
}
|
||||
(
|
||||
Some(WeightedScoreValue::Sort { asc: left_asc, value: left }),
|
||||
Some(WeightedScoreValue::Sort { asc: right_asc, value: right }),
|
||||
) => {
|
||||
if left_asc != right_asc {
|
||||
return left_weighted_global_score
|
||||
.partial_cmp(&right_weighted_global_score)
|
||||
.unwrap();
|
||||
}
|
||||
match score_details::compare_sort_values(left_asc, &left, &right) {
|
||||
Ordering::Equal => continue,
|
||||
order => return order,
|
||||
}
|
||||
}
|
||||
(
|
||||
Some(WeightedScoreValue::GeoSort { asc: left_asc, distance: left }),
|
||||
Some(WeightedScoreValue::GeoSort { asc: right_asc, distance: right }),
|
||||
) => {
|
||||
if left_asc != right_asc {
|
||||
continue;
|
||||
}
|
||||
match (left, right) {
|
||||
(None, None) => continue,
|
||||
(None, Some(_)) => return Ordering::Less,
|
||||
(Some(_), None) => return Ordering::Greater,
|
||||
(Some(left), Some(right)) => {
|
||||
if (left - right).abs() <= f64::EPSILON {
|
||||
continue;
|
||||
}
|
||||
return left.partial_cmp(&right).unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
// not comparable details, use global
|
||||
(Some(WeightedScoreValue::WeightedScore(_)), Some(_))
|
||||
| (Some(_), Some(WeightedScoreValue::WeightedScore(_)))
|
||||
| (Some(WeightedScoreValue::VectorSort(_)), Some(_))
|
||||
| (Some(_), Some(WeightedScoreValue::VectorSort(_)))
|
||||
| (Some(WeightedScoreValue::GeoSort { .. }), Some(WeightedScoreValue::Sort { .. }))
|
||||
| (Some(WeightedScoreValue::Sort { .. }), Some(WeightedScoreValue::GeoSort { .. })) => {
|
||||
let left_count = left_it.count();
|
||||
let right_count = right_it.count();
|
||||
// compare how many remaining groups of rules each side has.
|
||||
// the group with the most remaining groups wins.
|
||||
return left_count
|
||||
.cmp(&right_count)
|
||||
// breaks ties with the global ranking score
|
||||
.then_with(|| {
|
||||
left_weighted_global_score
|
||||
.partial_cmp(&right_weighted_global_score)
|
||||
.unwrap()
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -30,7 +30,7 @@ use milli::{
|
||||
MatchBounds, MatcherBuilder, SortError, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
|
||||
};
|
||||
use regex::Regex;
|
||||
use serde::Serialize;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::{json, Value};
|
||||
#[cfg(test)]
|
||||
mod mod_test;
|
||||
@ -41,7 +41,7 @@ use crate::error::MeilisearchHttpError;
|
||||
mod federated;
|
||||
pub use federated::{
|
||||
perform_federated_search, FederatedSearch, FederatedSearchResult, Federation,
|
||||
FederationOptions, MergeFacets,
|
||||
FederationOptions, MergeFacets, PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE,
|
||||
};
|
||||
|
||||
mod ranking_rules;
|
||||
@ -119,7 +119,7 @@ pub struct SearchQuery {
|
||||
pub locales: Option<Vec<Locale>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Deserr, ToSchema)]
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Deserr, ToSchema, Serialize)]
|
||||
#[deserr(try_from(f64) = TryFrom::try_from -> InvalidSearchRankingScoreThreshold)]
|
||||
pub struct RankingScoreThreshold(f64);
|
||||
impl std::convert::TryFrom<f64> for RankingScoreThreshold {
|
||||
@ -275,11 +275,13 @@ impl fmt::Debug for SearchQuery {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, PartialEq, Deserr, ToSchema)]
|
||||
#[derive(Debug, Clone, Default, PartialEq, Deserr, ToSchema, Serialize)]
|
||||
#[deserr(error = DeserrJsonError<InvalidSearchHybridQuery>, rename_all = camelCase, deny_unknown_fields)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct HybridQuery {
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchSemanticRatio>, default)]
|
||||
#[schema(value_type = f32, default)]
|
||||
#[serde(default)]
|
||||
pub semantic_ratio: SemanticRatio,
|
||||
#[deserr(error = DeserrJsonError<InvalidSearchEmbedder>)]
|
||||
pub embedder: String,
|
||||
@ -369,7 +371,7 @@ impl SearchKind {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Deserr)]
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Deserr, Serialize)]
|
||||
#[deserr(try_from(f32) = TryFrom::try_from -> InvalidSearchSemanticRatio)]
|
||||
pub struct SemanticRatio(f32);
|
||||
|
||||
@ -411,8 +413,9 @@ impl SearchQuery {
|
||||
// This struct contains the fields of `SearchQuery` inline.
|
||||
// This is because neither deserr nor serde support `flatten` when using `deny_unknown_fields.
|
||||
// The `From<SearchQueryWithIndex>` implementation ensures both structs remain up to date.
|
||||
#[derive(Debug, Clone, PartialEq, Deserr, ToSchema)]
|
||||
#[derive(Debug, Clone, Serialize, PartialEq, Deserr, ToSchema)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
pub struct SearchQueryWithIndex {
|
||||
#[deserr(error = DeserrJsonError<InvalidIndexUid>, missing_field_error = DeserrJsonError::missing_index_uid)]
|
||||
@ -493,6 +496,72 @@ impl SearchQueryWithIndex {
|
||||
self.facets.as_deref().filter(|v| !v.is_empty())
|
||||
}
|
||||
|
||||
pub fn from_index_query_federation(
|
||||
index_uid: IndexUid,
|
||||
query: SearchQuery,
|
||||
federation_options: Option<FederationOptions>,
|
||||
) -> Self {
|
||||
let SearchQuery {
|
||||
q,
|
||||
vector,
|
||||
hybrid,
|
||||
offset,
|
||||
limit,
|
||||
page,
|
||||
hits_per_page,
|
||||
attributes_to_retrieve,
|
||||
retrieve_vectors,
|
||||
attributes_to_crop,
|
||||
crop_length,
|
||||
attributes_to_highlight,
|
||||
show_matches_position,
|
||||
show_ranking_score,
|
||||
show_ranking_score_details,
|
||||
filter,
|
||||
sort,
|
||||
distinct,
|
||||
facets,
|
||||
highlight_pre_tag,
|
||||
highlight_post_tag,
|
||||
crop_marker,
|
||||
matching_strategy,
|
||||
attributes_to_search_on,
|
||||
ranking_score_threshold,
|
||||
locales,
|
||||
} = query;
|
||||
|
||||
SearchQueryWithIndex {
|
||||
index_uid,
|
||||
q,
|
||||
vector,
|
||||
hybrid,
|
||||
offset: if offset == DEFAULT_SEARCH_OFFSET() { None } else { Some(offset) },
|
||||
limit: if limit == DEFAULT_SEARCH_LIMIT() { None } else { Some(limit) },
|
||||
page,
|
||||
hits_per_page,
|
||||
attributes_to_retrieve,
|
||||
retrieve_vectors,
|
||||
attributes_to_crop,
|
||||
crop_length,
|
||||
attributes_to_highlight,
|
||||
show_ranking_score,
|
||||
show_ranking_score_details,
|
||||
show_matches_position,
|
||||
filter,
|
||||
sort,
|
||||
distinct,
|
||||
facets,
|
||||
highlight_pre_tag,
|
||||
highlight_post_tag,
|
||||
crop_marker,
|
||||
matching_strategy,
|
||||
attributes_to_search_on,
|
||||
ranking_score_threshold,
|
||||
locales,
|
||||
federation_options,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_index_query_federation(self) -> (IndexUid, SearchQuery, Option<FederationOptions>) {
|
||||
let SearchQueryWithIndex {
|
||||
index_uid,
|
||||
@ -620,8 +689,9 @@ impl TryFrom<Value> for ExternalDocumentId {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr, ToSchema)]
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr, ToSchema, Serialize)]
|
||||
#[deserr(rename_all = camelCase)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub enum MatchingStrategy {
|
||||
/// Remove query words from last to first
|
||||
Last,
|
||||
@ -667,19 +737,19 @@ impl From<FacetValuesSort> for OrderBy {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, PartialEq, ToSchema)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, ToSchema)]
|
||||
pub struct SearchHit {
|
||||
#[serde(flatten)]
|
||||
#[schema(additional_properties, inline, value_type = HashMap<String, Value>)]
|
||||
pub document: Document,
|
||||
#[serde(rename = "_formatted", skip_serializing_if = "Document::is_empty")]
|
||||
#[serde(default, rename = "_formatted", skip_serializing_if = "Document::is_empty")]
|
||||
#[schema(additional_properties, value_type = HashMap<String, Value>)]
|
||||
pub formatted: Document,
|
||||
#[serde(rename = "_matchesPosition", skip_serializing_if = "Option::is_none")]
|
||||
#[serde(default, rename = "_matchesPosition", skip_serializing_if = "Option::is_none")]
|
||||
pub matches_position: Option<MatchesPosition>,
|
||||
#[serde(rename = "_rankingScore", skip_serializing_if = "Option::is_none")]
|
||||
#[serde(default, rename = "_rankingScore", skip_serializing_if = "Option::is_none")]
|
||||
pub ranking_score: Option<f64>,
|
||||
#[serde(rename = "_rankingScoreDetails", skip_serializing_if = "Option::is_none")]
|
||||
#[serde(default, rename = "_rankingScoreDetails", skip_serializing_if = "Option::is_none")]
|
||||
pub ranking_score_details: Option<serde_json::Map<String, serde_json::Value>>,
|
||||
}
|
||||
|
||||
@ -767,7 +837,7 @@ pub struct SearchResultWithIndex {
|
||||
pub result: SearchResult,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Debug, Clone, PartialEq, Eq, ToSchema)]
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, ToSchema)]
|
||||
#[serde(untagged)]
|
||||
pub enum HitsInfo {
|
||||
#[serde(rename_all = "camelCase")]
|
||||
@ -778,7 +848,7 @@ pub enum HitsInfo {
|
||||
OffsetLimit { limit: usize, offset: usize, estimated_total_hits: usize },
|
||||
}
|
||||
|
||||
#[derive(Serialize, Debug, Clone, PartialEq, ToSchema)]
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, ToSchema)]
|
||||
pub struct FacetStats {
|
||||
pub min: f64,
|
||||
pub max: f64,
|
||||
@ -1061,7 +1131,7 @@ pub fn perform_search(
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, Serialize, ToSchema)]
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, ToSchema)]
|
||||
pub struct ComputedFacets {
|
||||
#[schema(value_type = BTreeMap<String, BTreeMap<String, u64>>)]
|
||||
pub distribution: BTreeMap<String, IndexMap<String, u64>>,
|
||||
|
Binary file not shown.
@ -421,7 +421,7 @@ async fn error_add_api_key_invalid_parameters_actions() {
|
||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response, { ".createdAt" => "[ignored]", ".updatedAt" => "[ignored]" }), @r###"
|
||||
{
|
||||
"message": "Unknown value `doc.add` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`",
|
||||
"message": "Unknown value `doc.add` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `network.get`, `network.update`",
|
||||
"code": "invalid_api_key_actions",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_api_key_actions"
|
||||
|
@ -68,6 +68,8 @@ pub static AUTHORIZATIONS: Lazy<HashMap<(&'static str, &'static str), HashSet<&'
|
||||
("GET", "/keys") => hashset!{"keys.get", "*"},
|
||||
("GET", "/experimental-features") => hashset!{"experimental.get", "*"},
|
||||
("PATCH", "/experimental-features") => hashset!{"experimental.update", "*"},
|
||||
("GET", "/network") => hashset!{"network.get", "*"},
|
||||
("PATCH", "/network") => hashset!{"network.update", "*"},
|
||||
};
|
||||
|
||||
authorizations
|
||||
|
@ -93,7 +93,7 @@ async fn create_api_key_bad_actions() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Unknown value `doggo` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`",
|
||||
"message": "Unknown value `doggo` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `network.get`, `network.update`",
|
||||
"code": "invalid_api_key_actions",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_api_key_actions"
|
||||
|
@ -41,9 +41,8 @@ async fn list_batches() {
|
||||
let index = server.index("test");
|
||||
let (task, _status_code) = index.create(None).await;
|
||||
index.wait_task(task.uid()).await.succeeded();
|
||||
index
|
||||
.add_documents(serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), None)
|
||||
.await;
|
||||
let (task, _status_code) = index.create(None).await;
|
||||
index.wait_task(task.uid()).await.failed();
|
||||
let (response, code) = index.list_batches().await;
|
||||
assert_eq!(code, 200);
|
||||
assert_eq!(
|
||||
@ -96,11 +95,12 @@ async fn list_batches_pagination_and_reverse() {
|
||||
async fn list_batches_with_star_filters() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
let (batch, _code) = index.create(None).await;
|
||||
index.wait_task(batch.uid()).await.succeeded();
|
||||
index
|
||||
.add_documents(serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), None)
|
||||
.await;
|
||||
let (task, _code) = index.create(None).await;
|
||||
index.wait_task(task.uid()).await.succeeded();
|
||||
let index = server.index("test");
|
||||
let (task, _code) = index.create(None).await;
|
||||
index.wait_task(task.uid()).await.failed();
|
||||
|
||||
let (response, code) = index.service.get("/batches?indexUids=test").await;
|
||||
assert_eq!(code, 200);
|
||||
assert_eq!(response["results"].as_array().unwrap().len(), 2);
|
||||
@ -187,9 +187,6 @@ async fn list_batches_invalid_canceled_by_filter() {
|
||||
let index = server.index("test");
|
||||
let (task, _status_code) = index.create(None).await;
|
||||
index.wait_task(task.uid()).await.succeeded();
|
||||
index
|
||||
.add_documents(serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), None)
|
||||
.await;
|
||||
|
||||
let (response, code) = index.filtered_batches(&[], &[], &["0"]).await;
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
@ -202,9 +199,8 @@ async fn list_batches_status_and_type_filtered() {
|
||||
let index = server.index("test");
|
||||
let (task, _status_code) = index.create(None).await;
|
||||
index.wait_task(task.uid()).await.succeeded();
|
||||
index
|
||||
.add_documents(serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), None)
|
||||
.await;
|
||||
let (task, _status_code) = index.update(Some("id")).await;
|
||||
index.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
let (response, code) = index.filtered_batches(&["indexCreation"], &["failed"], &[]).await;
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
@ -212,7 +208,7 @@ async fn list_batches_status_and_type_filtered() {
|
||||
|
||||
let (response, code) = index
|
||||
.filtered_batches(
|
||||
&["indexCreation", "documentAdditionOrUpdate"],
|
||||
&["indexCreation", "IndexUpdate"],
|
||||
&["succeeded", "processing", "enqueued"],
|
||||
&[],
|
||||
)
|
||||
|
@ -88,6 +88,10 @@ impl Server<Owned> {
|
||||
self.service.api_key = Some(api_key.as_ref().to_string());
|
||||
}
|
||||
|
||||
pub fn clear_api_key(&mut self) {
|
||||
self.service.api_key = None;
|
||||
}
|
||||
|
||||
/// Fetch and use the default admin key for nexts http requests.
|
||||
pub async fn use_admin_key(&mut self, master_key: impl AsRef<str>) {
|
||||
self.use_api_key(master_key);
|
||||
@ -159,10 +163,18 @@ impl Server<Owned> {
|
||||
self.service.get("/tasks").await
|
||||
}
|
||||
|
||||
pub async fn batches(&self) -> (Value, StatusCode) {
|
||||
self.service.get("/batches").await
|
||||
}
|
||||
|
||||
pub async fn set_features(&self, value: Value) -> (Value, StatusCode) {
|
||||
self.service.patch("/experimental-features", value).await
|
||||
}
|
||||
|
||||
pub async fn set_network(&self, value: Value) -> (Value, StatusCode) {
|
||||
self.service.patch("/network", value).await
|
||||
}
|
||||
|
||||
pub async fn get_metrics(&self) -> (Value, StatusCode) {
|
||||
self.service.get("/metrics").await
|
||||
}
|
||||
@ -408,6 +420,10 @@ impl<State> Server<State> {
|
||||
pub async fn get_features(&self) -> (Value, StatusCode) {
|
||||
self.service.get("/experimental-features").await
|
||||
}
|
||||
|
||||
pub async fn get_network(&self) -> (Value, StatusCode) {
|
||||
self.service.get("/network").await
|
||||
}
|
||||
}
|
||||
|
||||
pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
|
||||
|
@ -1803,6 +1803,275 @@ async fn add_documents_with_geo_field() {
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
|
||||
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
|
||||
@r###"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"id": "1"
|
||||
},
|
||||
{
|
||||
"id": "2",
|
||||
"_geo": null
|
||||
},
|
||||
{
|
||||
"id": "3",
|
||||
"_geo": {
|
||||
"lat": 1,
|
||||
"lng": 1
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "4",
|
||||
"_geo": {
|
||||
"lat": "1",
|
||||
"lng": "1"
|
||||
}
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
"limit": 20,
|
||||
"total": 4
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(json!({"sort": ["_geoPoint(50.629973371633746,3.0569447399419567):desc"]}))
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
// we are expecting docs 4 and 3 first as they have geo
|
||||
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }),
|
||||
@r###"
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"id": "4",
|
||||
"_geo": {
|
||||
"lat": "1",
|
||||
"lng": "1"
|
||||
},
|
||||
"_geoDistance": 5522018
|
||||
},
|
||||
{
|
||||
"id": "3",
|
||||
"_geo": {
|
||||
"lat": 1,
|
||||
"lng": 1
|
||||
},
|
||||
"_geoDistance": 5522018
|
||||
},
|
||||
{
|
||||
"id": "1"
|
||||
},
|
||||
{
|
||||
"id": "2",
|
||||
"_geo": null
|
||||
}
|
||||
],
|
||||
"query": "",
|
||||
"processingTimeMs": "[time]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 4
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn update_documents_with_geo_field() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("doggo");
|
||||
index.update_settings(json!({"sortableAttributes": ["_geo"]})).await;
|
||||
|
||||
let documents = json!([
|
||||
{
|
||||
"id": "1",
|
||||
},
|
||||
{
|
||||
"id": "2",
|
||||
"_geo": null,
|
||||
},
|
||||
{
|
||||
"id": "3",
|
||||
"_geo": { "lat": 1, "lng": 1 },
|
||||
},
|
||||
{
|
||||
"id": "4",
|
||||
"_geo": { "lat": "1", "lng": "1" },
|
||||
},
|
||||
]);
|
||||
|
||||
let (task, _status_code) = index.add_documents(documents, None).await;
|
||||
let response = index.wait_task(task.uid()).await;
|
||||
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
|
||||
@r###"
|
||||
{
|
||||
"uid": 1,
|
||||
"batchUid": 1,
|
||||
"indexUid": "doggo",
|
||||
"status": "succeeded",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 4,
|
||||
"indexedDocuments": 4
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(json!({"sort": ["_geoPoint(50.629973371633746,3.0569447399419567):desc"]}))
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
// we are expecting docs 4 and 3 first as they have geo
|
||||
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }),
|
||||
@r###"
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"id": "4",
|
||||
"_geo": {
|
||||
"lat": "1",
|
||||
"lng": "1"
|
||||
},
|
||||
"_geoDistance": 5522018
|
||||
},
|
||||
{
|
||||
"id": "3",
|
||||
"_geo": {
|
||||
"lat": 1,
|
||||
"lng": 1
|
||||
},
|
||||
"_geoDistance": 5522018
|
||||
},
|
||||
{
|
||||
"id": "1"
|
||||
},
|
||||
{
|
||||
"id": "2",
|
||||
"_geo": null
|
||||
}
|
||||
],
|
||||
"query": "",
|
||||
"processingTimeMs": "[time]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 4
|
||||
}
|
||||
"###);
|
||||
|
||||
let updated_documents = json!([{
|
||||
"id": "3",
|
||||
"doggo": "kefir",
|
||||
}]);
|
||||
let (task, _status_code) = index.update_documents(updated_documents, None).await;
|
||||
let response = index.wait_task(task.uid()).await;
|
||||
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
|
||||
@r###"
|
||||
{
|
||||
"uid": 2,
|
||||
"batchUid": 2,
|
||||
"indexUid": "doggo",
|
||||
"status": "succeeded",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 1,
|
||||
"indexedDocuments": 1
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
|
||||
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
|
||||
@r###"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"id": "1"
|
||||
},
|
||||
{
|
||||
"id": "2",
|
||||
"_geo": null
|
||||
},
|
||||
{
|
||||
"id": "3",
|
||||
"_geo": {
|
||||
"lat": 1,
|
||||
"lng": 1
|
||||
},
|
||||
"doggo": "kefir"
|
||||
},
|
||||
{
|
||||
"id": "4",
|
||||
"_geo": {
|
||||
"lat": "1",
|
||||
"lng": "1"
|
||||
}
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
"limit": 20,
|
||||
"total": 4
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(json!({"sort": ["_geoPoint(50.629973371633746,3.0569447399419567):desc"]}))
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
// the search response should not have changed: we are expecting docs 4 and 3 first as they have geo
|
||||
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }),
|
||||
@r###"
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"id": "4",
|
||||
"_geo": {
|
||||
"lat": "1",
|
||||
"lng": "1"
|
||||
},
|
||||
"_geoDistance": 5522018
|
||||
},
|
||||
{
|
||||
"id": "3",
|
||||
"_geo": {
|
||||
"lat": 1,
|
||||
"lng": 1
|
||||
},
|
||||
"doggo": "kefir",
|
||||
"_geoDistance": 5522018
|
||||
},
|
||||
{
|
||||
"id": "1"
|
||||
},
|
||||
{
|
||||
"id": "2",
|
||||
"_geo": null
|
||||
}
|
||||
],
|
||||
"query": "",
|
||||
"processingTimeMs": "[time]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 4
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
|
@ -161,6 +161,8 @@ async fn delete_document_by_filter() {
|
||||
{
|
||||
"numberOfDocuments": 4,
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 0,
|
||||
"numberOfEmbeddedDocuments": 0,
|
||||
"fieldDistribution": {
|
||||
"color": 3,
|
||||
"id": 4
|
||||
@ -208,6 +210,8 @@ async fn delete_document_by_filter() {
|
||||
{
|
||||
"numberOfDocuments": 2,
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 0,
|
||||
"numberOfEmbeddedDocuments": 0,
|
||||
"fieldDistribution": {
|
||||
"color": 1,
|
||||
"id": 2
|
||||
@ -274,6 +278,8 @@ async fn delete_document_by_filter() {
|
||||
{
|
||||
"numberOfDocuments": 1,
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 0,
|
||||
"numberOfEmbeddedDocuments": 0,
|
||||
"fieldDistribution": {
|
||||
"color": 1,
|
||||
"id": 1
|
||||
|
@ -22,6 +22,7 @@ pub enum GetDump {
|
||||
TestV5,
|
||||
|
||||
TestV6WithExperimental,
|
||||
TestV6WithBatchesAndEnqueuedTasks,
|
||||
}
|
||||
|
||||
impl GetDump {
|
||||
@ -74,6 +75,10 @@ impl GetDump {
|
||||
"tests/assets/v6_v1.6.0_use_deactivated_experimental_setting.dump"
|
||||
)
|
||||
.into(),
|
||||
GetDump::TestV6WithBatchesAndEnqueuedTasks => {
|
||||
exist_relative_path!("tests/assets/v6_v1.13.0_batches_and_enqueued_tasks.dump")
|
||||
.into()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -27,9 +27,24 @@ async fn import_dump_v1_movie_raw() {
|
||||
|
||||
let (stats, code) = index.stats().await;
|
||||
snapshot!(code, @"200 OK");
|
||||
assert_eq!(
|
||||
stats,
|
||||
json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }})
|
||||
snapshot!(
|
||||
json_string!(stats),
|
||||
@r###"
|
||||
{
|
||||
"numberOfDocuments": 53,
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 0,
|
||||
"numberOfEmbeddedDocuments": 0,
|
||||
"fieldDistribution": {
|
||||
"genres": 53,
|
||||
"id": 53,
|
||||
"overview": 53,
|
||||
"poster": 53,
|
||||
"release_date": 53,
|
||||
"title": 53
|
||||
}
|
||||
}
|
||||
"###
|
||||
);
|
||||
|
||||
let (settings, code) = index.settings().await;
|
||||
@ -173,6 +188,8 @@ async fn import_dump_v1_movie_with_settings() {
|
||||
{
|
||||
"numberOfDocuments": 53,
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 0,
|
||||
"numberOfEmbeddedDocuments": 0,
|
||||
"fieldDistribution": {
|
||||
"genres": 53,
|
||||
"id": 53,
|
||||
@ -333,9 +350,24 @@ async fn import_dump_v1_rubygems_with_settings() {
|
||||
|
||||
let (stats, code) = index.stats().await;
|
||||
snapshot!(code, @"200 OK");
|
||||
assert_eq!(
|
||||
stats,
|
||||
json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"description": 53, "id": 53, "name": 53, "summary": 53, "total_downloads": 53, "version": 53 }})
|
||||
snapshot!(
|
||||
json_string!(stats),
|
||||
@r###"
|
||||
{
|
||||
"numberOfDocuments": 53,
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 0,
|
||||
"numberOfEmbeddedDocuments": 0,
|
||||
"fieldDistribution": {
|
||||
"description": 53,
|
||||
"id": 53,
|
||||
"name": 53,
|
||||
"summary": 53,
|
||||
"total_downloads": 53,
|
||||
"version": 53
|
||||
}
|
||||
}
|
||||
"###
|
||||
);
|
||||
|
||||
let (settings, code) = index.settings().await;
|
||||
@ -483,9 +515,24 @@ async fn import_dump_v2_movie_raw() {
|
||||
|
||||
let (stats, code) = index.stats().await;
|
||||
snapshot!(code, @"200 OK");
|
||||
assert_eq!(
|
||||
stats,
|
||||
json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }})
|
||||
snapshot!(
|
||||
json_string!(stats),
|
||||
@r###"
|
||||
{
|
||||
"numberOfDocuments": 53,
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 0,
|
||||
"numberOfEmbeddedDocuments": 0,
|
||||
"fieldDistribution": {
|
||||
"genres": 53,
|
||||
"id": 53,
|
||||
"overview": 53,
|
||||
"poster": 53,
|
||||
"release_date": 53,
|
||||
"title": 53
|
||||
}
|
||||
}
|
||||
"###
|
||||
);
|
||||
|
||||
let (settings, code) = index.settings().await;
|
||||
@ -623,9 +670,24 @@ async fn import_dump_v2_movie_with_settings() {
|
||||
|
||||
let (stats, code) = index.stats().await;
|
||||
snapshot!(code, @"200 OK");
|
||||
assert_eq!(
|
||||
stats,
|
||||
json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }})
|
||||
snapshot!(
|
||||
json_string!(stats),
|
||||
@r###"
|
||||
{
|
||||
"numberOfDocuments": 53,
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 0,
|
||||
"numberOfEmbeddedDocuments": 0,
|
||||
"fieldDistribution": {
|
||||
"genres": 53,
|
||||
"id": 53,
|
||||
"overview": 53,
|
||||
"poster": 53,
|
||||
"release_date": 53,
|
||||
"title": 53
|
||||
}
|
||||
}
|
||||
"###
|
||||
);
|
||||
|
||||
let (settings, code) = index.settings().await;
|
||||
@ -773,9 +835,24 @@ async fn import_dump_v2_rubygems_with_settings() {
|
||||
|
||||
let (stats, code) = index.stats().await;
|
||||
snapshot!(code, @"200 OK");
|
||||
assert_eq!(
|
||||
stats,
|
||||
json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"description": 53, "id": 53, "name": 53, "summary": 53, "total_downloads": 53, "version": 53 }})
|
||||
snapshot!(
|
||||
json_string!(stats),
|
||||
@r###"
|
||||
{
|
||||
"numberOfDocuments": 53,
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 0,
|
||||
"numberOfEmbeddedDocuments": 0,
|
||||
"fieldDistribution": {
|
||||
"description": 53,
|
||||
"id": 53,
|
||||
"name": 53,
|
||||
"summary": 53,
|
||||
"total_downloads": 53,
|
||||
"version": 53
|
||||
}
|
||||
}
|
||||
"###
|
||||
);
|
||||
|
||||
let (settings, code) = index.settings().await;
|
||||
@ -920,9 +997,24 @@ async fn import_dump_v3_movie_raw() {
|
||||
|
||||
let (stats, code) = index.stats().await;
|
||||
snapshot!(code, @"200 OK");
|
||||
assert_eq!(
|
||||
stats,
|
||||
json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }})
|
||||
snapshot!(
|
||||
json_string!(stats),
|
||||
@r###"
|
||||
{
|
||||
"numberOfDocuments": 53,
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 0,
|
||||
"numberOfEmbeddedDocuments": 0,
|
||||
"fieldDistribution": {
|
||||
"genres": 53,
|
||||
"id": 53,
|
||||
"overview": 53,
|
||||
"poster": 53,
|
||||
"release_date": 53,
|
||||
"title": 53
|
||||
}
|
||||
}
|
||||
"###
|
||||
);
|
||||
|
||||
let (settings, code) = index.settings().await;
|
||||
@ -1060,9 +1152,24 @@ async fn import_dump_v3_movie_with_settings() {
|
||||
|
||||
let (stats, code) = index.stats().await;
|
||||
snapshot!(code, @"200 OK");
|
||||
assert_eq!(
|
||||
stats,
|
||||
json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }})
|
||||
snapshot!(
|
||||
json_string!(stats),
|
||||
@r###"
|
||||
{
|
||||
"numberOfDocuments": 53,
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 0,
|
||||
"numberOfEmbeddedDocuments": 0,
|
||||
"fieldDistribution": {
|
||||
"genres": 53,
|
||||
"id": 53,
|
||||
"overview": 53,
|
||||
"poster": 53,
|
||||
"release_date": 53,
|
||||
"title": 53
|
||||
}
|
||||
}
|
||||
"###
|
||||
);
|
||||
|
||||
let (settings, code) = index.settings().await;
|
||||
@ -1210,9 +1317,24 @@ async fn import_dump_v3_rubygems_with_settings() {
|
||||
|
||||
let (stats, code) = index.stats().await;
|
||||
snapshot!(code, @"200 OK");
|
||||
assert_eq!(
|
||||
stats,
|
||||
json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"description": 53, "id": 53, "name": 53, "summary": 53, "total_downloads": 53, "version": 53 }})
|
||||
snapshot!(
|
||||
json_string!(stats),
|
||||
@r###"
|
||||
{
|
||||
"numberOfDocuments": 53,
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 0,
|
||||
"numberOfEmbeddedDocuments": 0,
|
||||
"fieldDistribution": {
|
||||
"description": 53,
|
||||
"id": 53,
|
||||
"name": 53,
|
||||
"summary": 53,
|
||||
"total_downloads": 53,
|
||||
"version": 53
|
||||
}
|
||||
}
|
||||
"###
|
||||
);
|
||||
|
||||
let (settings, code) = index.settings().await;
|
||||
@ -1357,9 +1479,24 @@ async fn import_dump_v4_movie_raw() {
|
||||
|
||||
let (stats, code) = index.stats().await;
|
||||
snapshot!(code, @"200 OK");
|
||||
assert_eq!(
|
||||
stats,
|
||||
json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }})
|
||||
snapshot!(
|
||||
json_string!(stats),
|
||||
@r###"
|
||||
{
|
||||
"numberOfDocuments": 53,
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 0,
|
||||
"numberOfEmbeddedDocuments": 0,
|
||||
"fieldDistribution": {
|
||||
"genres": 53,
|
||||
"id": 53,
|
||||
"overview": 53,
|
||||
"poster": 53,
|
||||
"release_date": 53,
|
||||
"title": 53
|
||||
}
|
||||
}
|
||||
"###
|
||||
);
|
||||
|
||||
let (settings, code) = index.settings().await;
|
||||
@ -1497,9 +1634,24 @@ async fn import_dump_v4_movie_with_settings() {
|
||||
|
||||
let (stats, code) = index.stats().await;
|
||||
snapshot!(code, @"200 OK");
|
||||
assert_eq!(
|
||||
stats,
|
||||
json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }})
|
||||
snapshot!(
|
||||
json_string!(stats),
|
||||
@r###"
|
||||
{
|
||||
"numberOfDocuments": 53,
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 0,
|
||||
"numberOfEmbeddedDocuments": 0,
|
||||
"fieldDistribution": {
|
||||
"genres": 53,
|
||||
"id": 53,
|
||||
"overview": 53,
|
||||
"poster": 53,
|
||||
"release_date": 53,
|
||||
"title": 53
|
||||
}
|
||||
}
|
||||
"###
|
||||
);
|
||||
|
||||
let (settings, code) = index.settings().await;
|
||||
@ -1647,9 +1799,24 @@ async fn import_dump_v4_rubygems_with_settings() {
|
||||
|
||||
let (stats, code) = index.stats().await;
|
||||
snapshot!(code, @"200 OK");
|
||||
assert_eq!(
|
||||
stats,
|
||||
json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"description": 53, "id": 53, "name": 53, "summary": 53, "total_downloads": 53, "version": 53 }})
|
||||
snapshot!(
|
||||
json_string!(stats),
|
||||
@r###"
|
||||
{
|
||||
"numberOfDocuments": 53,
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 0,
|
||||
"numberOfEmbeddedDocuments": 0,
|
||||
"fieldDistribution": {
|
||||
"description": 53,
|
||||
"id": 53,
|
||||
"name": 53,
|
||||
"summary": 53,
|
||||
"total_downloads": 53,
|
||||
"version": 53
|
||||
}
|
||||
}
|
||||
"###
|
||||
);
|
||||
|
||||
let (settings, code) = index.settings().await;
|
||||
@ -1798,33 +1965,35 @@ async fn import_dump_v5() {
|
||||
server.wait_task(task["uid"].as_u64().unwrap()).await;
|
||||
}
|
||||
|
||||
let expected_stats = json!({
|
||||
"numberOfDocuments": 10,
|
||||
"isIndexing": false,
|
||||
"fieldDistribution": {
|
||||
"cast": 10,
|
||||
"director": 10,
|
||||
"genres": 10,
|
||||
"id": 10,
|
||||
"overview": 10,
|
||||
"popularity": 10,
|
||||
"poster_path": 10,
|
||||
"producer": 10,
|
||||
"production_companies": 10,
|
||||
"release_date": 10,
|
||||
"tagline": 10,
|
||||
"title": 10,
|
||||
"vote_average": 10,
|
||||
"vote_count": 10
|
||||
}
|
||||
});
|
||||
|
||||
let index1 = server.index("test");
|
||||
let index2 = server.index("test2");
|
||||
|
||||
let (stats, code) = index1.stats().await;
|
||||
snapshot!(code, @"200 OK");
|
||||
assert_eq!(stats, expected_stats);
|
||||
snapshot!(json_string!(stats), @r###"
|
||||
{
|
||||
"numberOfDocuments": 10,
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 0,
|
||||
"numberOfEmbeddedDocuments": 0,
|
||||
"fieldDistribution": {
|
||||
"cast": 10,
|
||||
"director": 10,
|
||||
"genres": 10,
|
||||
"id": 10,
|
||||
"overview": 10,
|
||||
"popularity": 10,
|
||||
"poster_path": 10,
|
||||
"producer": 10,
|
||||
"production_companies": 10,
|
||||
"release_date": 10,
|
||||
"tagline": 10,
|
||||
"title": 10,
|
||||
"vote_average": 10,
|
||||
"vote_count": 10
|
||||
}
|
||||
}
|
||||
"###);
|
||||
|
||||
let (docs, code) = index2.get_all_documents(GetAllDocumentsOptions::default()).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
@ -1835,7 +2004,32 @@ async fn import_dump_v5() {
|
||||
|
||||
let (stats, code) = index2.stats().await;
|
||||
snapshot!(code, @"200 OK");
|
||||
assert_eq!(stats, expected_stats);
|
||||
snapshot!(
|
||||
json_string!(stats),
|
||||
@r###"
|
||||
{
|
||||
"numberOfDocuments": 10,
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 0,
|
||||
"numberOfEmbeddedDocuments": 0,
|
||||
"fieldDistribution": {
|
||||
"cast": 10,
|
||||
"director": 10,
|
||||
"genres": 10,
|
||||
"id": 10,
|
||||
"overview": 10,
|
||||
"popularity": 10,
|
||||
"poster_path": 10,
|
||||
"producer": 10,
|
||||
"production_companies": 10,
|
||||
"release_date": 10,
|
||||
"tagline": 10,
|
||||
"title": 10,
|
||||
"vote_average": 10,
|
||||
"vote_count": 10
|
||||
}
|
||||
}
|
||||
"###);
|
||||
|
||||
let (keys, code) = server.list_api_keys("").await;
|
||||
snapshot!(code, @"200 OK");
|
||||
@ -1908,7 +2102,9 @@ async fn import_dump_v6_containing_experimental_features() {
|
||||
"metrics": false,
|
||||
"logsRoute": false,
|
||||
"editDocumentsByFunction": false,
|
||||
"containsFilter": false
|
||||
"containsFilter": false,
|
||||
"network": false,
|
||||
"getTaskDocumentsRoute": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -1992,6 +2188,63 @@ async fn import_dump_v6_containing_experimental_features() {
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn import_dump_v6_containing_batches_and_enqueued_tasks() {
|
||||
let temp = tempfile::tempdir().unwrap();
|
||||
|
||||
let options = Opt {
|
||||
import_dump: Some(GetDump::TestV6WithBatchesAndEnqueuedTasks.path()),
|
||||
..default_settings(temp.path())
|
||||
};
|
||||
let mut server = Server::new_auth_with_options(options, temp).await;
|
||||
server.use_api_key("MASTER_KEY");
|
||||
server.wait_task(2).await.succeeded();
|
||||
let (tasks, _) = server.tasks().await;
|
||||
snapshot!(json_string!(tasks, { ".results[1].startedAt" => "[date]", ".results[1].finishedAt" => "[date]", ".results[1].duration" => "[date]" }), name: "tasks");
|
||||
let (batches, _) = server.batches().await;
|
||||
snapshot!(json_string!(batches, { ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].duration" => "[date]" }), name: "batches");
|
||||
|
||||
let (indexes, code) = server.list_indexes(None, None).await;
|
||||
assert_eq!(code, 200, "{indexes}");
|
||||
|
||||
assert_eq!(indexes["results"].as_array().unwrap().len(), 1);
|
||||
assert_eq!(indexes["results"][0]["uid"], json!("kefir"));
|
||||
assert_eq!(indexes["results"][0]["primaryKey"], json!("id"));
|
||||
|
||||
let (response, code) = server.get_features().await;
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"metrics": false,
|
||||
"logsRoute": false,
|
||||
"editDocumentsByFunction": false,
|
||||
"containsFilter": false,
|
||||
"network": false,
|
||||
"getTaskDocumentsRoute": false
|
||||
}
|
||||
"###);
|
||||
|
||||
let index = server.index("kefir");
|
||||
let (documents, _) = index.get_all_documents_raw("").await;
|
||||
snapshot!(documents, @r#"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"id": 1,
|
||||
"dog": "kefir"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"dog": "intel"
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
"limit": 20,
|
||||
"total": 2
|
||||
}
|
||||
"#);
|
||||
}
|
||||
|
||||
// In this test we must generate the dump ourselves to ensure the
|
||||
// `user provided` vectors are well set
|
||||
#[actix_rt::test]
|
||||
@ -2069,7 +2322,9 @@ async fn generate_and_import_dump_containing_vectors() {
|
||||
"metrics": false,
|
||||
"logsRoute": false,
|
||||
"editDocumentsByFunction": false,
|
||||
"containsFilter": false
|
||||
"containsFilter": false,
|
||||
"network": false,
|
||||
"getTaskDocumentsRoute": false
|
||||
}
|
||||
"###);
|
||||
|
||||
|
@ -0,0 +1,78 @@
|
||||
---
|
||||
source: crates/meilisearch/tests/dumps/mod.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"uid": 2,
|
||||
"progress": null,
|
||||
"details": {
|
||||
"receivedDocuments": 1,
|
||||
"indexedDocuments": 1
|
||||
},
|
||||
"stats": {
|
||||
"totalNbTasks": 1,
|
||||
"status": {
|
||||
"succeeded": 1
|
||||
},
|
||||
"types": {
|
||||
"documentAdditionOrUpdate": 1
|
||||
},
|
||||
"indexUids": {
|
||||
"kefir": 1
|
||||
}
|
||||
},
|
||||
"duration": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
},
|
||||
{
|
||||
"uid": 1,
|
||||
"progress": null,
|
||||
"details": {
|
||||
"receivedDocuments": 1,
|
||||
"indexedDocuments": 1
|
||||
},
|
||||
"stats": {
|
||||
"totalNbTasks": 1,
|
||||
"status": {
|
||||
"succeeded": 1
|
||||
},
|
||||
"types": {
|
||||
"documentAdditionOrUpdate": 1
|
||||
},
|
||||
"indexUids": {
|
||||
"kefir": 1
|
||||
}
|
||||
},
|
||||
"duration": "PT0.144827890S",
|
||||
"startedAt": "2025-02-04T10:15:21.275640274Z",
|
||||
"finishedAt": "2025-02-04T10:15:21.420468164Z"
|
||||
},
|
||||
{
|
||||
"uid": 0,
|
||||
"progress": null,
|
||||
"details": {},
|
||||
"stats": {
|
||||
"totalNbTasks": 1,
|
||||
"status": {
|
||||
"succeeded": 1
|
||||
},
|
||||
"types": {
|
||||
"indexCreation": 1
|
||||
},
|
||||
"indexUids": {
|
||||
"kefir": 1
|
||||
}
|
||||
},
|
||||
"duration": "PT0.032902186S",
|
||||
"startedAt": "2025-02-04T10:14:43.559526162Z",
|
||||
"finishedAt": "2025-02-04T10:14:43.592428348Z"
|
||||
}
|
||||
],
|
||||
"total": 3,
|
||||
"limit": 20,
|
||||
"from": 2,
|
||||
"next": null
|
||||
}
|
@ -0,0 +1,78 @@
|
||||
---
|
||||
source: crates/meilisearch/tests/dumps/mod.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"uid": 3,
|
||||
"batchUid": null,
|
||||
"indexUid": null,
|
||||
"status": "succeeded",
|
||||
"type": "dumpCreation",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"dumpUid": null
|
||||
},
|
||||
"error": null,
|
||||
"duration": "PT0.000629059S",
|
||||
"enqueuedAt": "2025-02-04T10:22:31.318175268Z",
|
||||
"startedAt": "2025-02-04T10:22:31.331701375Z",
|
||||
"finishedAt": "2025-02-04T10:22:31.332330434Z"
|
||||
},
|
||||
{
|
||||
"uid": 2,
|
||||
"batchUid": 2,
|
||||
"indexUid": "kefir",
|
||||
"status": "succeeded",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 1,
|
||||
"indexedDocuments": 1
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[date]",
|
||||
"enqueuedAt": "2025-02-04T10:15:49.212484063Z",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
},
|
||||
{
|
||||
"uid": 1,
|
||||
"batchUid": null,
|
||||
"indexUid": "kefir",
|
||||
"status": "succeeded",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 1,
|
||||
"indexedDocuments": 1
|
||||
},
|
||||
"error": null,
|
||||
"duration": "PT0.144827890S",
|
||||
"enqueuedAt": "2025-02-04T10:15:21.258630973Z",
|
||||
"startedAt": "2025-02-04T10:15:21.275640274Z",
|
||||
"finishedAt": "2025-02-04T10:15:21.420468164Z"
|
||||
},
|
||||
{
|
||||
"uid": 0,
|
||||
"batchUid": null,
|
||||
"indexUid": "kefir",
|
||||
"status": "succeeded",
|
||||
"type": "indexCreation",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"primaryKey": null
|
||||
},
|
||||
"error": null,
|
||||
"duration": "PT0.032902186S",
|
||||
"enqueuedAt": "2025-02-04T10:14:43.550379968Z",
|
||||
"startedAt": "2025-02-04T10:14:43.559526162Z",
|
||||
"finishedAt": "2025-02-04T10:14:43.592428348Z"
|
||||
}
|
||||
],
|
||||
"total": 4,
|
||||
"limit": 20,
|
||||
"from": 3,
|
||||
"next": null
|
||||
}
|
@ -21,7 +21,9 @@ async fn experimental_features() {
|
||||
"metrics": false,
|
||||
"logsRoute": false,
|
||||
"editDocumentsByFunction": false,
|
||||
"containsFilter": false
|
||||
"containsFilter": false,
|
||||
"network": false,
|
||||
"getTaskDocumentsRoute": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -33,7 +35,9 @@ async fn experimental_features() {
|
||||
"metrics": true,
|
||||
"logsRoute": false,
|
||||
"editDocumentsByFunction": false,
|
||||
"containsFilter": false
|
||||
"containsFilter": false,
|
||||
"network": false,
|
||||
"getTaskDocumentsRoute": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -45,7 +49,9 @@ async fn experimental_features() {
|
||||
"metrics": true,
|
||||
"logsRoute": false,
|
||||
"editDocumentsByFunction": false,
|
||||
"containsFilter": false
|
||||
"containsFilter": false,
|
||||
"network": false,
|
||||
"getTaskDocumentsRoute": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -58,7 +64,9 @@ async fn experimental_features() {
|
||||
"metrics": true,
|
||||
"logsRoute": false,
|
||||
"editDocumentsByFunction": false,
|
||||
"containsFilter": false
|
||||
"containsFilter": false,
|
||||
"network": false,
|
||||
"getTaskDocumentsRoute": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -71,7 +79,9 @@ async fn experimental_features() {
|
||||
"metrics": true,
|
||||
"logsRoute": false,
|
||||
"editDocumentsByFunction": false,
|
||||
"containsFilter": false
|
||||
"containsFilter": false,
|
||||
"network": false,
|
||||
"getTaskDocumentsRoute": false
|
||||
}
|
||||
"###);
|
||||
}
|
||||
@ -91,7 +101,9 @@ async fn experimental_feature_metrics() {
|
||||
"metrics": true,
|
||||
"logsRoute": false,
|
||||
"editDocumentsByFunction": false,
|
||||
"containsFilter": false
|
||||
"containsFilter": false,
|
||||
"network": false,
|
||||
"getTaskDocumentsRoute": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -146,7 +158,7 @@ async fn errors() {
|
||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"message": "Unknown field `NotAFeature`: expected one of `metrics`, `logsRoute`, `editDocumentsByFunction`, `containsFilter`",
|
||||
"message": "Unknown field `NotAFeature`: expected one of `metrics`, `logsRoute`, `editDocumentsByFunction`, `containsFilter`, `network`, `getTaskDocumentsRoute`",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
|
@ -7,6 +7,7 @@ mod dumps;
|
||||
mod features;
|
||||
mod index;
|
||||
mod logs;
|
||||
mod network;
|
||||
mod search;
|
||||
mod settings;
|
||||
mod similar;
|
||||
|
606
crates/meilisearch/tests/network/mod.rs
Normal file
606
crates/meilisearch/tests/network/mod.rs
Normal file
@ -0,0 +1,606 @@
|
||||
use serde_json::Value::Null;
|
||||
|
||||
use crate::common::Server;
|
||||
use crate::json;
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn error_network_not_enabled() {
|
||||
let server = Server::new().await;
|
||||
|
||||
let (response, code) = server.get_network().await;
|
||||
|
||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"message": "Using the /network route requires enabling the `network` experimental feature. See https://github.com/orgs/meilisearch/discussions/805",
|
||||
"code": "feature_not_enabled",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = server.set_network(json!({"self": "myself"})).await;
|
||||
|
||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"message": "Using the /network route requires enabling the `network` experimental feature. See https://github.com/orgs/meilisearch/discussions/805",
|
||||
"code": "feature_not_enabled",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn errors_on_param() {
|
||||
let server = Server::new().await;
|
||||
|
||||
let (response, code) = server.set_features(json!({"network": true})).await;
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["network"]), @r#"true"#);
|
||||
|
||||
// non-existing param
|
||||
let (response, code) = server.set_network(json!({"selfie": "myself"})).await;
|
||||
|
||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"message": "Unknown field `selfie`: expected one of `remotes`, `self`",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
}
|
||||
"###);
|
||||
|
||||
// self not a string
|
||||
let (response, code) = server.set_network(json!({"self": 42})).await;
|
||||
|
||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value type at `.self`: expected a string, but found a positive integer: `42`",
|
||||
"code": "invalid_network_self",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_network_self"
|
||||
}
|
||||
"###);
|
||||
|
||||
// remotes not an object
|
||||
let (response, code) = server.set_network(json!({"remotes": 42})).await;
|
||||
|
||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value type at `.remotes`: expected an object, but found a positive integer: `42`",
|
||||
"code": "invalid_network_remotes",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_network_remotes"
|
||||
}
|
||||
"###);
|
||||
|
||||
// new remote without url
|
||||
let (response, code) = server
|
||||
.set_network(json!({"remotes": {
|
||||
"new": {
|
||||
"searchApiKey": "http://localhost:7700"
|
||||
}
|
||||
}}))
|
||||
.await;
|
||||
|
||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"message": "Missing field `.remotes.new.url`",
|
||||
"code": "missing_network_url",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#missing_network_url"
|
||||
}
|
||||
"###);
|
||||
|
||||
// remote with url not a string
|
||||
let (response, code) = server
|
||||
.set_network(json!({"remotes": {
|
||||
"new": {
|
||||
"url": 7700
|
||||
}
|
||||
}}))
|
||||
.await;
|
||||
|
||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value type at `.remotes.new.url`: expected a string, but found a positive integer: `7700`",
|
||||
"code": "invalid_network_url",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_network_url"
|
||||
}
|
||||
"###);
|
||||
|
||||
// remote with non-existing param
|
||||
let (response, code) = server
|
||||
.set_network(json!({"remotes": {
|
||||
"new": {
|
||||
"url": "http://localhost:7700",
|
||||
"doggo": "Intel the Beagle"
|
||||
}
|
||||
}}))
|
||||
.await;
|
||||
|
||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"message": "Unknown field `doggo` inside `.remotes.new`: expected one of `url`, `searchApiKey`",
|
||||
"code": "invalid_network_remotes",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_network_remotes"
|
||||
}
|
||||
"###);
|
||||
|
||||
// remote with non-string searchApiKey
|
||||
let (response, code) = server
|
||||
.set_network(json!({"remotes": {
|
||||
"new": {
|
||||
"url": "http://localhost:7700",
|
||||
"searchApiKey": 1204664602099962445u64,
|
||||
}
|
||||
}}))
|
||||
.await;
|
||||
|
||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value type at `.remotes.new.searchApiKey`: expected a string, but found a positive integer: `1204664602099962445`",
|
||||
"code": "invalid_network_search_api_key",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_network_search_api_key"
|
||||
}
|
||||
"###);
|
||||
|
||||
// setting `null` on URL a posteriori
|
||||
let (response, code) = server
|
||||
.set_network(json!({"remotes": {
|
||||
"kefir": {
|
||||
"url": "http://localhost:7700",
|
||||
}
|
||||
}}))
|
||||
.await;
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": null,
|
||||
"remotes": {
|
||||
"kefir": {
|
||||
"url": "http://localhost:7700",
|
||||
"searchApiKey": null
|
||||
}
|
||||
}
|
||||
}
|
||||
"###);
|
||||
let (response, code) = server
|
||||
.set_network(json!({"remotes": {
|
||||
"kefir": {
|
||||
"url": Null,
|
||||
}
|
||||
}}))
|
||||
.await;
|
||||
|
||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"message": "Field `.remotes.kefir.url` cannot be set to `null`",
|
||||
"code": "invalid_network_url",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_network_url"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn auth() {
|
||||
let mut server = Server::new_auth().await;
|
||||
server.use_api_key("MASTER_KEY");
|
||||
|
||||
let (response, code) = server.set_features(json!({"network": true})).await;
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["network"]), @r#"true"#);
|
||||
|
||||
let (get_network_key, code) = server
|
||||
.add_api_key(json!({
|
||||
"actions": ["network.get"],
|
||||
"indexes": ["*"],
|
||||
"expiresAt": serde_json::Value::Null
|
||||
}))
|
||||
.await;
|
||||
meili_snap::snapshot!(code, @"201 Created");
|
||||
let get_network_key = get_network_key["key"].clone();
|
||||
|
||||
let (update_network_key, code) = server
|
||||
.add_api_key(json!({
|
||||
"actions": ["network.update"],
|
||||
"indexes": ["*"],
|
||||
"expiresAt": serde_json::Value::Null
|
||||
}))
|
||||
.await;
|
||||
meili_snap::snapshot!(code, @"201 Created");
|
||||
let update_network_key = update_network_key["key"].clone();
|
||||
|
||||
let (search_api_key, code) = server
|
||||
.add_api_key(json!({
|
||||
"actions": ["search"],
|
||||
"indexes": ["*"],
|
||||
"expiresAt": serde_json::Value::Null
|
||||
}))
|
||||
.await;
|
||||
meili_snap::snapshot!(code, @"201 Created");
|
||||
let search_api_key = search_api_key["key"].clone();
|
||||
|
||||
// try with master key
|
||||
let (response, code) = server
|
||||
.set_network(json!({
|
||||
"self": "master"
|
||||
}))
|
||||
.await;
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "master",
|
||||
"remotes": {}
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = server.get_network().await;
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "master",
|
||||
"remotes": {}
|
||||
}
|
||||
"###);
|
||||
|
||||
// try get with get permission
|
||||
server.use_api_key(get_network_key.as_str().unwrap());
|
||||
let (response, code) = server.get_network().await;
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "master",
|
||||
"remotes": {}
|
||||
}
|
||||
"###);
|
||||
|
||||
// try update with update permission
|
||||
server.use_api_key(update_network_key.as_str().unwrap());
|
||||
|
||||
let (response, code) = server
|
||||
.set_network(json!({
|
||||
"self": "api_key"
|
||||
}))
|
||||
.await;
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "api_key",
|
||||
"remotes": {}
|
||||
}
|
||||
"###);
|
||||
|
||||
// try with the other's permission
|
||||
let (response, code) = server.get_network().await;
|
||||
|
||||
meili_snap::snapshot!(code, @"403 Forbidden");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"message": "The provided API key is invalid.",
|
||||
"code": "invalid_api_key",
|
||||
"type": "auth",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
|
||||
}
|
||||
"###);
|
||||
|
||||
server.use_api_key(get_network_key.as_str().unwrap());
|
||||
let (response, code) = server
|
||||
.set_network(json!({
|
||||
"self": "get_api_key"
|
||||
}))
|
||||
.await;
|
||||
|
||||
meili_snap::snapshot!(code, @"403 Forbidden");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"message": "The provided API key is invalid.",
|
||||
"code": "invalid_api_key",
|
||||
"type": "auth",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
|
||||
}
|
||||
"###);
|
||||
// try either with bad permission
|
||||
server.use_api_key(search_api_key.as_str().unwrap());
|
||||
let (response, code) = server.get_network().await;
|
||||
|
||||
meili_snap::snapshot!(code, @"403 Forbidden");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"message": "The provided API key is invalid.",
|
||||
"code": "invalid_api_key",
|
||||
"type": "auth",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = server
|
||||
.set_network(json!({
|
||||
"self": "get_api_key"
|
||||
}))
|
||||
.await;
|
||||
|
||||
meili_snap::snapshot!(code, @"403 Forbidden");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"message": "The provided API key is invalid.",
|
||||
"code": "invalid_api_key",
|
||||
"type": "auth",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn get_and_set_network() {
|
||||
let server = Server::new().await;
|
||||
|
||||
let (response, code) = server.set_features(json!({"network": true})).await;
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["network"]), @r#"true"#);
|
||||
|
||||
let (response, code) = server.get_network().await;
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": null,
|
||||
"remotes": {}
|
||||
}
|
||||
"###);
|
||||
|
||||
// adding self
|
||||
let (response, code) = server.set_network(json!({"self": "myself"})).await;
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "myself",
|
||||
"remotes": {}
|
||||
}
|
||||
"###);
|
||||
|
||||
// adding remotes
|
||||
let (response, code) = server
|
||||
.set_network(json!({"remotes": {
|
||||
"myself": {
|
||||
"url": "http://localhost:7700"
|
||||
},
|
||||
"thy": {
|
||||
"url": "http://localhost:7701",
|
||||
"searchApiKey": "foo"
|
||||
}
|
||||
}}))
|
||||
.await;
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "myself",
|
||||
"remotes": {
|
||||
"myself": {
|
||||
"url": "http://localhost:7700",
|
||||
"searchApiKey": null
|
||||
},
|
||||
"thy": {
|
||||
"url": "http://localhost:7701",
|
||||
"searchApiKey": "foo"
|
||||
}
|
||||
}
|
||||
}
|
||||
"###);
|
||||
|
||||
// partially updating one remote
|
||||
let (response, code) = server
|
||||
.set_network(json!({"remotes": {
|
||||
"thy": {
|
||||
"searchApiKey": "bar"
|
||||
}
|
||||
}}))
|
||||
.await;
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "myself",
|
||||
"remotes": {
|
||||
"myself": {
|
||||
"url": "http://localhost:7700",
|
||||
"searchApiKey": null
|
||||
},
|
||||
"thy": {
|
||||
"url": "http://localhost:7701",
|
||||
"searchApiKey": "bar"
|
||||
}
|
||||
}
|
||||
}
|
||||
"###);
|
||||
|
||||
// adding one remote
|
||||
let (response, code) = server
|
||||
.set_network(json!({"remotes": {
|
||||
"them": {
|
||||
"url": "http://localhost:7702",
|
||||
"searchApiKey": "baz"
|
||||
}
|
||||
}}))
|
||||
.await;
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "myself",
|
||||
"remotes": {
|
||||
"myself": {
|
||||
"url": "http://localhost:7700",
|
||||
"searchApiKey": null
|
||||
},
|
||||
"them": {
|
||||
"url": "http://localhost:7702",
|
||||
"searchApiKey": "baz"
|
||||
},
|
||||
"thy": {
|
||||
"url": "http://localhost:7701",
|
||||
"searchApiKey": "bar"
|
||||
}
|
||||
}
|
||||
}
|
||||
"###);
|
||||
|
||||
// deleting one remote
|
||||
let (response, code) = server
|
||||
.set_network(json!({"remotes": {
|
||||
"myself": Null,
|
||||
}}))
|
||||
.await;
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "myself",
|
||||
"remotes": {
|
||||
"them": {
|
||||
"url": "http://localhost:7702",
|
||||
"searchApiKey": "baz"
|
||||
},
|
||||
"thy": {
|
||||
"url": "http://localhost:7701",
|
||||
"searchApiKey": "bar"
|
||||
}
|
||||
}
|
||||
}
|
||||
"###);
|
||||
|
||||
// removing self
|
||||
let (response, code) = server.set_network(json!({"self": Null})).await;
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": null,
|
||||
"remotes": {
|
||||
"them": {
|
||||
"url": "http://localhost:7702",
|
||||
"searchApiKey": "baz"
|
||||
},
|
||||
"thy": {
|
||||
"url": "http://localhost:7701",
|
||||
"searchApiKey": "bar"
|
||||
}
|
||||
}
|
||||
}
|
||||
"###);
|
||||
|
||||
// setting self again
|
||||
let (response, code) = server.set_network(json!({"self": "thy"})).await;
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "thy",
|
||||
"remotes": {
|
||||
"them": {
|
||||
"url": "http://localhost:7702",
|
||||
"searchApiKey": "baz"
|
||||
},
|
||||
"thy": {
|
||||
"url": "http://localhost:7701",
|
||||
"searchApiKey": "bar"
|
||||
}
|
||||
}
|
||||
}
|
||||
"###);
|
||||
|
||||
// doing nothing
|
||||
let (response, code) = server.set_network(json!({})).await;
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "thy",
|
||||
"remotes": {
|
||||
"them": {
|
||||
"url": "http://localhost:7702",
|
||||
"searchApiKey": "baz"
|
||||
},
|
||||
"thy": {
|
||||
"url": "http://localhost:7701",
|
||||
"searchApiKey": "bar"
|
||||
}
|
||||
}
|
||||
}
|
||||
"###);
|
||||
|
||||
// still doing nothing
|
||||
let (response, code) = server.set_network(json!({"remotes": {}})).await;
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "thy",
|
||||
"remotes": {
|
||||
"them": {
|
||||
"url": "http://localhost:7702",
|
||||
"searchApiKey": "baz"
|
||||
},
|
||||
"thy": {
|
||||
"url": "http://localhost:7701",
|
||||
"searchApiKey": "bar"
|
||||
}
|
||||
}
|
||||
}
|
||||
"###);
|
||||
|
||||
// good time to check GET
|
||||
let (response, code) = server.get_network().await;
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "thy",
|
||||
"remotes": {
|
||||
"them": {
|
||||
"url": "http://localhost:7702",
|
||||
"searchApiKey": "baz"
|
||||
},
|
||||
"thy": {
|
||||
"url": "http://localhost:7701",
|
||||
"searchApiKey": "bar"
|
||||
}
|
||||
}
|
||||
}
|
||||
"###);
|
||||
|
||||
// deleting everything
|
||||
let (response, code) = server
|
||||
.set_network(json!({
|
||||
"remotes": Null,
|
||||
}))
|
||||
.await;
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "thy",
|
||||
"remotes": {}
|
||||
}
|
||||
"###);
|
||||
}
|
@ -5,6 +5,8 @@ use crate::common::Server;
|
||||
use crate::json;
|
||||
use crate::search::{SCORE_DOCUMENTS, VECTOR_DOCUMENTS};
|
||||
|
||||
mod proxy;
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_empty_list() {
|
||||
let server = Server::new().await;
|
2591
crates/meilisearch/tests/search/multi/proxy.rs
Normal file
2591
crates/meilisearch/tests/search/multi/proxy.rs
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,3 +1,4 @@
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use time::format_description::well_known::Rfc3339;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
@ -74,3 +75,253 @@ async fn stats() {
|
||||
assert_eq!(response["indexes"]["test"]["fieldDistribution"]["name"], 1);
|
||||
assert_eq!(response["indexes"]["test"]["fieldDistribution"]["age"], 1);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn add_remove_embeddings() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("doggo");
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
},
|
||||
"handcrafted": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
},
|
||||
|
||||
},
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
// 2 embedded documents for 5 embeddings in total
|
||||
let documents = json!([
|
||||
{"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0], "handcrafted": [0, 0, 0] }},
|
||||
{"id": 1, "name": "echo", "_vectors": { "manual": [1, 1, 1], "handcrafted": [[1, 1, 1], [2, 2, 2]] }},
|
||||
]);
|
||||
|
||||
let (response, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
index.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
let (stats, _code) = index.stats().await;
|
||||
snapshot!(json_string!(stats), @r###"
|
||||
{
|
||||
"numberOfDocuments": 2,
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 5,
|
||||
"numberOfEmbeddedDocuments": 2,
|
||||
"fieldDistribution": {
|
||||
"id": 2,
|
||||
"name": 2
|
||||
}
|
||||
}
|
||||
"###);
|
||||
|
||||
// 2 embedded documents for 3 embeddings in total
|
||||
let documents = json!([
|
||||
{"id": 1, "name": "echo", "_vectors": { "manual": [1, 1, 1], "handcrafted": null }},
|
||||
]);
|
||||
|
||||
let (response, code) = index.update_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
index.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
let (stats, _code) = index.stats().await;
|
||||
snapshot!(json_string!(stats), @r###"
|
||||
{
|
||||
"numberOfDocuments": 2,
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 3,
|
||||
"numberOfEmbeddedDocuments": 2,
|
||||
"fieldDistribution": {
|
||||
"id": 2,
|
||||
"name": 2
|
||||
}
|
||||
}
|
||||
"###);
|
||||
|
||||
// 2 embedded documents for 2 embeddings in total
|
||||
let documents = json!([
|
||||
{"id": 0, "name": "kefir", "_vectors": { "manual": null, "handcrafted": [0, 0, 0] }},
|
||||
]);
|
||||
|
||||
let (response, code) = index.update_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
index.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
let (stats, _code) = index.stats().await;
|
||||
snapshot!(json_string!(stats), @r###"
|
||||
{
|
||||
"numberOfDocuments": 2,
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 2,
|
||||
"numberOfEmbeddedDocuments": 2,
|
||||
"fieldDistribution": {
|
||||
"id": 2,
|
||||
"name": 2
|
||||
}
|
||||
}
|
||||
"###);
|
||||
|
||||
// 1 embedded documents for 2 embeddings in total
|
||||
let documents = json!([
|
||||
{"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0], "handcrafted": [0, 0, 0] }},
|
||||
{"id": 1, "name": "echo", "_vectors": { "manual": null, "handcrafted": null }},
|
||||
]);
|
||||
|
||||
let (response, code) = index.update_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
index.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
let (stats, _code) = index.stats().await;
|
||||
snapshot!(json_string!(stats), @r###"
|
||||
{
|
||||
"numberOfDocuments": 2,
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 2,
|
||||
"numberOfEmbeddedDocuments": 1,
|
||||
"fieldDistribution": {
|
||||
"id": 2,
|
||||
"name": 2
|
||||
}
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn add_remove_embedded_documents() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("doggo");
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
},
|
||||
"handcrafted": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
},
|
||||
|
||||
},
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
// 2 embedded documents for 5 embeddings in total
|
||||
let documents = json!([
|
||||
{"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0], "handcrafted": [0, 0, 0] }},
|
||||
{"id": 1, "name": "echo", "_vectors": { "manual": [1, 1, 1], "handcrafted": [[1, 1, 1], [2, 2, 2]] }},
|
||||
]);
|
||||
|
||||
let (response, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
index.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
let (stats, _code) = index.stats().await;
|
||||
snapshot!(json_string!(stats), @r###"
|
||||
{
|
||||
"numberOfDocuments": 2,
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 5,
|
||||
"numberOfEmbeddedDocuments": 2,
|
||||
"fieldDistribution": {
|
||||
"id": 2,
|
||||
"name": 2
|
||||
}
|
||||
}
|
||||
"###);
|
||||
|
||||
// delete one embedded document, remaining 1 embedded documents for 3 embeddings in total
|
||||
let (response, code) = index.delete_document(0).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
index.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
let (stats, _code) = index.stats().await;
|
||||
snapshot!(json_string!(stats), @r###"
|
||||
{
|
||||
"numberOfDocuments": 1,
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 3,
|
||||
"numberOfEmbeddedDocuments": 1,
|
||||
"fieldDistribution": {
|
||||
"id": 1,
|
||||
"name": 1
|
||||
}
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn update_embedder_settings() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("doggo");
|
||||
|
||||
// 2 embedded documents for 3 embeddings in total
|
||||
// but no embedders are added in the settings yet so we expect 0 embedded documents for 0 embeddings in total
|
||||
let documents = json!([
|
||||
{"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0], "handcrafted": [0, 0, 0] }},
|
||||
{"id": 1, "name": "echo", "_vectors": { "manual": [1, 1, 1], "handcrafted": null }},
|
||||
]);
|
||||
|
||||
let (response, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
index.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
let (stats, _code) = index.stats().await;
|
||||
snapshot!(json_string!(stats), @r###"
|
||||
{
|
||||
"numberOfDocuments": 2,
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 0,
|
||||
"numberOfEmbeddedDocuments": 0,
|
||||
"fieldDistribution": {
|
||||
"id": 2,
|
||||
"name": 2
|
||||
}
|
||||
}
|
||||
"###);
|
||||
|
||||
// add embedders to the settings
|
||||
// 2 embedded documents for 3 embeddings in total
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
},
|
||||
"handcrafted": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
},
|
||||
|
||||
},
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
let (stats, _code) = index.stats().await;
|
||||
snapshot!(json_string!(stats), @r###"
|
||||
{
|
||||
"numberOfDocuments": 2,
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 3,
|
||||
"numberOfEmbeddedDocuments": 2,
|
||||
"fieldDistribution": {
|
||||
"id": 2,
|
||||
"name": 2
|
||||
}
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
@ -126,14 +126,17 @@ async fn check_the_index_scheduler(server: &Server) {
|
||||
"#);
|
||||
// And their metadata are still right
|
||||
let (stats, _) = server.stats().await;
|
||||
snapshot!(stats, @r#"
|
||||
snapshot!(stats, @r###"
|
||||
{
|
||||
"databaseSize": 438272,
|
||||
"usedDatabaseSize": 196608,
|
||||
"lastUpdate": "2025-01-23T11:36:22.634859166Z",
|
||||
"indexes": {
|
||||
"kefir": {
|
||||
"numberOfDocuments": 1,
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 0,
|
||||
"numberOfEmbeddedDocuments": 0,
|
||||
"fieldDistribution": {
|
||||
"age": 1,
|
||||
"description": 1,
|
||||
@ -144,7 +147,7 @@ async fn check_the_index_scheduler(server: &Server) {
|
||||
}
|
||||
}
|
||||
}
|
||||
"#);
|
||||
"###);
|
||||
|
||||
// Wait until the upgrade has been applied to all indexes to avoid flakyness
|
||||
let (tasks, _) = server.tasks_filter("types=upgradeDatabase&limit=1").await;
|
||||
@ -205,14 +208,17 @@ async fn check_the_index_scheduler(server: &Server) {
|
||||
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "batches_filter_afterFinishedAt_equal_2025-01-16T16_47_41");
|
||||
|
||||
let (stats, _) = server.stats().await;
|
||||
snapshot!(stats, @r#"
|
||||
snapshot!(stats, @r###"
|
||||
{
|
||||
"databaseSize": 438272,
|
||||
"usedDatabaseSize": 196608,
|
||||
"lastUpdate": "2025-01-23T11:36:22.634859166Z",
|
||||
"indexes": {
|
||||
"kefir": {
|
||||
"numberOfDocuments": 1,
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 0,
|
||||
"numberOfEmbeddedDocuments": 0,
|
||||
"fieldDistribution": {
|
||||
"age": 1,
|
||||
"description": 1,
|
||||
@ -223,13 +229,15 @@ async fn check_the_index_scheduler(server: &Server) {
|
||||
}
|
||||
}
|
||||
}
|
||||
"#);
|
||||
"###);
|
||||
let index = server.index("kefir");
|
||||
let (stats, _) = index.stats().await;
|
||||
snapshot!(stats, @r#"
|
||||
snapshot!(stats, @r###"
|
||||
{
|
||||
"numberOfDocuments": 1,
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 0,
|
||||
"numberOfEmbeddedDocuments": 0,
|
||||
"fieldDistribution": {
|
||||
"age": 1,
|
||||
"description": 1,
|
||||
@ -238,7 +246,7 @@ async fn check_the_index_scheduler(server: &Server) {
|
||||
"surname": 1
|
||||
}
|
||||
}
|
||||
"#);
|
||||
"###);
|
||||
|
||||
// Delete all the tasks of a specific batch
|
||||
let (task, _) = server.delete_tasks("batchUids=10").await;
|
||||
|
@ -32,7 +32,7 @@ async fn field_unavailable_for_source() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "`.embedders.default`: Field `revision` unavailable for source `openAi` (only available for sources: `huggingFace`). Available fields: `source`, `model`, `apiKey`, `documentTemplate`, `dimensions`, `distribution`, `url`, `binaryQuantized`",
|
||||
"message": "`.embedders.default`: Field `revision` unavailable for source `openAi` (only available for sources: `huggingFace`). Available fields: `source`, `model`, `apiKey`, `documentTemplate`, `documentTemplateMaxBytes`, `dimensions`, `distribution`, `url`, `binaryQuantized`",
|
||||
"code": "invalid_settings_embedders",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_settings_embedders"
|
||||
|
@ -1,19 +1,26 @@
|
||||
use std::fs::{read_dir, read_to_string, remove_file, File};
|
||||
use std::io::BufWriter;
|
||||
use std::io::{BufWriter, Write as _};
|
||||
use std::path::PathBuf;
|
||||
use std::time::Instant;
|
||||
|
||||
use anyhow::Context;
|
||||
use clap::{Parser, Subcommand};
|
||||
use anyhow::{bail, Context};
|
||||
use clap::{Parser, Subcommand, ValueEnum};
|
||||
use dump::{DumpWriter, IndexMetadata};
|
||||
use file_store::FileStore;
|
||||
use meilisearch_auth::AuthController;
|
||||
use meilisearch_types::heed::types::{SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified};
|
||||
use meilisearch_types::batches::Batch;
|
||||
use meilisearch_types::heed::types::{Bytes, SerdeJson, Str};
|
||||
use meilisearch_types::heed::{
|
||||
CompactionOption, Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified,
|
||||
};
|
||||
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
|
||||
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
|
||||
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
|
||||
use meilisearch_types::milli::{obkv_to_json, BEU32};
|
||||
use meilisearch_types::tasks::{Status, Task};
|
||||
use meilisearch_types::versioning::{get_version, parse_version};
|
||||
use meilisearch_types::Index;
|
||||
use serde_json::Value::Object;
|
||||
use time::macros::format_description;
|
||||
use time::OffsetDateTime;
|
||||
use upgrade::OfflineUpgrade;
|
||||
@ -65,6 +72,24 @@ enum Command {
|
||||
skip_enqueued_tasks: bool,
|
||||
},
|
||||
|
||||
/// Exports the documents of an index in NDJSON format from a Meilisearch index to stdout.
|
||||
///
|
||||
/// This command can be executed on a running Meilisearch database. However, please note that
|
||||
/// it will maintain a read-only transaction for the duration of the extraction process.
|
||||
ExportDocuments {
|
||||
/// The index name to export the documents from.
|
||||
#[arg(long)]
|
||||
index_name: String,
|
||||
|
||||
/// Do not export vectors with the documents.
|
||||
#[arg(long)]
|
||||
ignore_vectors: bool,
|
||||
|
||||
/// The number of documents to skip.
|
||||
#[arg(long)]
|
||||
offset: Option<usize>,
|
||||
},
|
||||
|
||||
/// Attempts to upgrade from one major version to the next without a dump.
|
||||
///
|
||||
/// Make sure to run this commmand when Meilisearch is not running!
|
||||
@ -78,6 +103,46 @@ enum Command {
|
||||
#[arg(long)]
|
||||
target_version: String,
|
||||
},
|
||||
|
||||
/// Compact the index by using LMDB.
|
||||
///
|
||||
/// You must run this command while Meilisearch is off. The reason is that Meilisearch keep the
|
||||
/// indexes opened and this compaction operation writes into another file. Meilisearch will not
|
||||
/// switch to the new file.
|
||||
///
|
||||
/// **Another possibility** is to keep Meilisearch running to serve search requests, run the
|
||||
/// compaction and once done, close and immediately reopen Meilisearch. This way Meilisearch
|
||||
/// will reopened the data.mdb file when rebooting and see the newly compacted file, ignoring
|
||||
/// the previous non-compacted data.
|
||||
///
|
||||
/// Note that the compaction will open the index, copy and compact the index into another file
|
||||
/// **on the same disk as the index** and replace the previous index with the newly compacted
|
||||
/// one. This means that the disk must have enough room for at most two times the index size.
|
||||
///
|
||||
/// To make sure not to lose any data, this tool takes a mutable transaction on the index
|
||||
/// before running the copy and compaction. This way the current indexation must finish before
|
||||
/// the compaction operation can start. Once the compaction is done, the big index is replaced
|
||||
/// by the compacted one and the mutable transaction is released.
|
||||
CompactIndex { index_name: String },
|
||||
|
||||
/// Uses the hair dryer the dedicate pages hot in cache
|
||||
///
|
||||
/// To make the index faster we must make sure it is hot in the DB cache that's the cure of
|
||||
/// memory-mapping but also it's strengh. This command is designed to make a spcific part of
|
||||
/// the index hot in cache.
|
||||
HairDryer {
|
||||
#[arg(long, value_delimiter = ',')]
|
||||
index_name: Vec<String>,
|
||||
|
||||
#[arg(long, value_delimiter = ',')]
|
||||
index_part: Vec<IndexPart>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Clone, ValueEnum)]
|
||||
enum IndexPart {
|
||||
/// Will make the arroy index hot.
|
||||
Arroy,
|
||||
}
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
@ -90,10 +155,17 @@ fn main() -> anyhow::Result<()> {
|
||||
Command::ExportADump { dump_dir, skip_enqueued_tasks } => {
|
||||
export_a_dump(db_path, dump_dir, skip_enqueued_tasks, detected_version)
|
||||
}
|
||||
Command::ExportDocuments { index_name, ignore_vectors, offset } => {
|
||||
export_documents(db_path, index_name, ignore_vectors, offset)
|
||||
}
|
||||
Command::OfflineUpgrade { target_version } => {
|
||||
let target_version = parse_version(&target_version).context("While parsing `--target-version`. Make sure `--target-version` is in the format MAJOR.MINOR.PATCH")?;
|
||||
OfflineUpgrade { db_path, current_version: detected_version, target_version }.upgrade()
|
||||
}
|
||||
Command::CompactIndex { index_name } => compact_index(db_path, &index_name),
|
||||
Command::HairDryer { index_name, index_part } => {
|
||||
hair_dryer(db_path, &index_name, &index_part)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -230,70 +302,86 @@ fn export_a_dump(
|
||||
|
||||
eprintln!("Successfully dumped {count} keys!");
|
||||
|
||||
eprintln!("Dumping the queue");
|
||||
let rtxn = env.read_txn()?;
|
||||
let all_tasks: Database<BEU32, SerdeJson<Task>> =
|
||||
try_opening_database(&env, &rtxn, "all-tasks")?;
|
||||
let all_batches: Database<BEU32, SerdeJson<Batch>> =
|
||||
try_opening_database(&env, &rtxn, "all-batches")?;
|
||||
let index_mapping: Database<Str, UuidCodec> =
|
||||
try_opening_database(&env, &rtxn, "index-mapping")?;
|
||||
|
||||
if skip_enqueued_tasks {
|
||||
eprintln!("Skip dumping the enqueued tasks...");
|
||||
} else {
|
||||
let mut dump_tasks = dump.create_tasks_queue()?;
|
||||
let mut count = 0;
|
||||
for ret in all_tasks.iter(&rtxn)? {
|
||||
let (_, t) = ret?;
|
||||
let status = t.status;
|
||||
let content_file = t.content_uuid();
|
||||
eprintln!("Dumping the tasks");
|
||||
let mut dump_tasks = dump.create_tasks_queue()?;
|
||||
let mut count_tasks = 0;
|
||||
let mut count_enqueued_tasks = 0;
|
||||
for ret in all_tasks.iter(&rtxn)? {
|
||||
let (_, t) = ret?;
|
||||
let status = t.status;
|
||||
let content_file = t.content_uuid();
|
||||
|
||||
let mut dump_content_file = dump_tasks.push_task(&t.into())?;
|
||||
if status == Status::Enqueued && skip_enqueued_tasks {
|
||||
continue;
|
||||
}
|
||||
|
||||
// 3.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
|
||||
if let Some(content_file_uuid) = content_file {
|
||||
if status == Status::Enqueued {
|
||||
let content_file = file_store.get_update(content_file_uuid)?;
|
||||
let mut dump_content_file = dump_tasks.push_task(&t.into())?;
|
||||
|
||||
if (detected_version.0, detected_version.1, detected_version.2) < (1, 12, 0) {
|
||||
eprintln!("Dumping the enqueued tasks reading them in obkv format...");
|
||||
let reader =
|
||||
DocumentsBatchReader::from_reader(content_file).with_context(|| {
|
||||
format!("While reading content file {:?}", content_file_uuid)
|
||||
})?;
|
||||
let (mut cursor, documents_batch_index) =
|
||||
reader.into_cursor_and_fields_index();
|
||||
while let Some(doc) = cursor.next_document().with_context(|| {
|
||||
format!("While iterating on content file {:?}", content_file_uuid)
|
||||
})? {
|
||||
dump_content_file
|
||||
.push_document(&obkv_to_object(doc, &documents_batch_index)?)?;
|
||||
}
|
||||
} else {
|
||||
eprintln!(
|
||||
"Dumping the enqueued tasks reading them in JSON stream format..."
|
||||
);
|
||||
for document in
|
||||
serde_json::de::Deserializer::from_reader(content_file).into_iter()
|
||||
{
|
||||
let document = document.with_context(|| {
|
||||
format!("While reading content file {:?}", content_file_uuid)
|
||||
})?;
|
||||
dump_content_file.push_document(&document)?;
|
||||
}
|
||||
// 3.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
|
||||
if let Some(content_file_uuid) = content_file {
|
||||
if status == Status::Enqueued {
|
||||
let content_file = file_store.get_update(content_file_uuid)?;
|
||||
|
||||
if (detected_version.0, detected_version.1, detected_version.2) < (1, 12, 0) {
|
||||
eprintln!("Dumping the enqueued tasks reading them in obkv format...");
|
||||
let reader =
|
||||
DocumentsBatchReader::from_reader(content_file).with_context(|| {
|
||||
format!("While reading content file {:?}", content_file_uuid)
|
||||
})?;
|
||||
let (mut cursor, documents_batch_index) = reader.into_cursor_and_fields_index();
|
||||
while let Some(doc) = cursor.next_document().with_context(|| {
|
||||
format!("While iterating on content file {:?}", content_file_uuid)
|
||||
})? {
|
||||
dump_content_file
|
||||
.push_document(&obkv_to_object(doc, &documents_batch_index)?)?;
|
||||
}
|
||||
} else {
|
||||
eprintln!("Dumping the enqueued tasks reading them in JSON stream format...");
|
||||
for document in
|
||||
serde_json::de::Deserializer::from_reader(content_file).into_iter()
|
||||
{
|
||||
let document = document.with_context(|| {
|
||||
format!("While reading content file {:?}", content_file_uuid)
|
||||
})?;
|
||||
dump_content_file.push_document(&document)?;
|
||||
}
|
||||
|
||||
dump_content_file.flush()?;
|
||||
count += 1;
|
||||
}
|
||||
|
||||
dump_content_file.flush()?;
|
||||
count_enqueued_tasks += 1;
|
||||
}
|
||||
}
|
||||
dump_tasks.flush()?;
|
||||
|
||||
eprintln!("Successfully dumped {count} enqueued tasks!");
|
||||
count_tasks += 1;
|
||||
}
|
||||
dump_tasks.flush()?;
|
||||
eprintln!(
|
||||
"Successfully dumped {count_tasks} tasks including {count_enqueued_tasks} enqueued tasks!"
|
||||
);
|
||||
|
||||
// 4. dump the batches
|
||||
eprintln!("Dumping the batches");
|
||||
let mut dump_batches = dump.create_batches_queue()?;
|
||||
let mut count = 0;
|
||||
|
||||
for ret in all_batches.iter(&rtxn)? {
|
||||
let (_, b) = ret?;
|
||||
dump_batches.push_batch(&b)?;
|
||||
count += 1;
|
||||
}
|
||||
dump_batches.flush()?;
|
||||
eprintln!("Successfully dumped {count} batches!");
|
||||
|
||||
// 5. Dump the indexes
|
||||
eprintln!("Dumping the indexes...");
|
||||
|
||||
// 4. Dump the indexes
|
||||
let mut count = 0;
|
||||
for result in index_mapping.iter(&rtxn)? {
|
||||
let (uid, uuid) = result?;
|
||||
@ -314,14 +402,14 @@ fn export_a_dump(
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
|
||||
// 4.1. Dump the documents
|
||||
// 5.1. Dump the documents
|
||||
for ret in index.all_documents(&rtxn)? {
|
||||
let (_id, doc) = ret?;
|
||||
let document = obkv_to_json(&all_fields, &fields_ids_map, doc)?;
|
||||
index_dumper.push_document(&document)?;
|
||||
}
|
||||
|
||||
// 4.2. Dump the settings
|
||||
// 5.2. Dump the settings
|
||||
let settings = meilisearch_types::settings::settings(
|
||||
&index,
|
||||
&rtxn,
|
||||
@ -347,3 +435,241 @@ fn export_a_dump(
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn compact_index(db_path: PathBuf, index_name: &str) -> anyhow::Result<()> {
|
||||
let index_scheduler_path = db_path.join("tasks");
|
||||
let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) }
|
||||
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
|
||||
|
||||
let rtxn = env.read_txn()?;
|
||||
let index_mapping: Database<Str, UuidCodec> =
|
||||
try_opening_database(&env, &rtxn, "index-mapping")?;
|
||||
|
||||
for result in index_mapping.iter(&rtxn)? {
|
||||
let (uid, uuid) = result?;
|
||||
|
||||
if uid != index_name {
|
||||
eprintln!("Found index {uid} and skipping it");
|
||||
continue;
|
||||
} else {
|
||||
eprintln!("Found index {uid} 🎉");
|
||||
}
|
||||
|
||||
let index_path = db_path.join("indexes").join(uuid.to_string());
|
||||
let index = Index::new(EnvOpenOptions::new(), &index_path, false).with_context(|| {
|
||||
format!("While trying to open the index at path {:?}", index_path.display())
|
||||
})?;
|
||||
|
||||
eprintln!("Awaiting for a mutable transaction...");
|
||||
let _wtxn = index.write_txn().context("While awaiting for a write transaction")?;
|
||||
|
||||
// We create and immediately drop the file because the
|
||||
let non_compacted_index_file_path = index_path.join("data.mdb");
|
||||
let compacted_index_file_path = index_path.join("data.mdb.cpy");
|
||||
|
||||
eprintln!("Compacting the index...");
|
||||
let before_compaction = Instant::now();
|
||||
let new_file = index
|
||||
.copy_to_file(&compacted_index_file_path, CompactionOption::Enabled)
|
||||
.with_context(|| format!("While compacting {}", compacted_index_file_path.display()))?;
|
||||
|
||||
let after_size = new_file.metadata()?.len();
|
||||
let before_size = std::fs::metadata(&non_compacted_index_file_path)
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"While retrieving the metadata of {}",
|
||||
non_compacted_index_file_path.display(),
|
||||
)
|
||||
})?
|
||||
.len();
|
||||
|
||||
let reduction = before_size as f64 / after_size as f64;
|
||||
println!("Compaction successful. Took around {:.2?}", before_compaction.elapsed());
|
||||
eprintln!("The index went from {before_size} bytes to {after_size} bytes ({reduction:.2}x reduction)");
|
||||
|
||||
eprintln!("Replacing the non-compacted index by the compacted one...");
|
||||
std::fs::rename(&compacted_index_file_path, &non_compacted_index_file_path).with_context(
|
||||
|| {
|
||||
format!(
|
||||
"While renaming {} into {}",
|
||||
compacted_index_file_path.display(),
|
||||
non_compacted_index_file_path.display(),
|
||||
)
|
||||
},
|
||||
)?;
|
||||
|
||||
drop(new_file);
|
||||
|
||||
println!("Everything's done 🎉");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
bail!("Target index {index_name} not found!")
|
||||
}
|
||||
|
||||
fn export_documents(
|
||||
db_path: PathBuf,
|
||||
index_name: String,
|
||||
ignore_vectors: bool,
|
||||
offset: Option<usize>,
|
||||
) -> anyhow::Result<()> {
|
||||
let index_scheduler_path = db_path.join("tasks");
|
||||
let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) }
|
||||
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
|
||||
|
||||
let rtxn = env.read_txn()?;
|
||||
let index_mapping: Database<Str, UuidCodec> =
|
||||
try_opening_database(&env, &rtxn, "index-mapping")?;
|
||||
|
||||
for result in index_mapping.iter(&rtxn)? {
|
||||
let (uid, uuid) = result?;
|
||||
if uid == index_name {
|
||||
let index_path = db_path.join("indexes").join(uuid.to_string());
|
||||
let index =
|
||||
Index::new(EnvOpenOptions::new(), &index_path, false).with_context(|| {
|
||||
format!("While trying to open the index at path {:?}", index_path.display())
|
||||
})?;
|
||||
|
||||
let rtxn = index.read_txn()?;
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
let embedding_configs = index.embedding_configs(&rtxn)?;
|
||||
|
||||
if let Some(offset) = offset {
|
||||
eprintln!("Skipping {offset} documents");
|
||||
}
|
||||
|
||||
let mut stdout = BufWriter::new(std::io::stdout());
|
||||
let all_documents = index.documents_ids(&rtxn)?.into_iter().skip(offset.unwrap_or(0));
|
||||
for (i, ret) in index.iter_documents(&rtxn, all_documents)?.enumerate() {
|
||||
let (id, doc) = ret?;
|
||||
let mut document = obkv_to_json(&all_fields, &fields_ids_map, doc)?;
|
||||
|
||||
if i % 10_000 == 0 {
|
||||
eprintln!("Starting the {}th document", i + offset.unwrap_or(0));
|
||||
}
|
||||
|
||||
if !ignore_vectors {
|
||||
'inject_vectors: {
|
||||
let embeddings = index.embeddings(&rtxn, id)?;
|
||||
|
||||
if embeddings.is_empty() {
|
||||
break 'inject_vectors;
|
||||
}
|
||||
|
||||
let vectors = document
|
||||
.entry(RESERVED_VECTORS_FIELD_NAME)
|
||||
.or_insert(Object(Default::default()));
|
||||
|
||||
let Object(vectors) = vectors else {
|
||||
return Err(meilisearch_types::milli::Error::UserError(
|
||||
meilisearch_types::milli::UserError::InvalidVectorsMapType {
|
||||
document_id: {
|
||||
if let Ok(Some(Ok(index))) = index
|
||||
.external_id_of(&rtxn, std::iter::once(id))
|
||||
.map(|it| it.into_iter().next())
|
||||
{
|
||||
index
|
||||
} else {
|
||||
format!("internal docid={id}")
|
||||
}
|
||||
},
|
||||
value: vectors.clone(),
|
||||
},
|
||||
)
|
||||
.into());
|
||||
};
|
||||
|
||||
for (embedder_name, embeddings) in embeddings {
|
||||
let user_provided = embedding_configs
|
||||
.iter()
|
||||
.find(|conf| conf.name == embedder_name)
|
||||
.is_some_and(|conf| conf.user_provided.contains(id));
|
||||
|
||||
let embeddings = ExplicitVectors {
|
||||
embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors(
|
||||
embeddings,
|
||||
)),
|
||||
regenerate: !user_provided,
|
||||
};
|
||||
vectors
|
||||
.insert(embedder_name, serde_json::to_value(embeddings).unwrap());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
serde_json::to_writer(&mut stdout, &document)?;
|
||||
}
|
||||
|
||||
stdout.flush()?;
|
||||
} else {
|
||||
eprintln!("Found index {uid} but it's not the right index...");
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn hair_dryer(
|
||||
db_path: PathBuf,
|
||||
index_names: &[String],
|
||||
index_parts: &[IndexPart],
|
||||
) -> anyhow::Result<()> {
|
||||
let index_scheduler_path = db_path.join("tasks");
|
||||
let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) }
|
||||
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
|
||||
|
||||
eprintln!("Trying to get a read transaction on the index scheduler...");
|
||||
|
||||
let rtxn = env.read_txn()?;
|
||||
let index_mapping: Database<Str, UuidCodec> =
|
||||
try_opening_database(&env, &rtxn, "index-mapping")?;
|
||||
|
||||
for result in index_mapping.iter(&rtxn)? {
|
||||
let (uid, uuid) = result?;
|
||||
if index_names.iter().any(|i| i == uid) {
|
||||
let index_path = db_path.join("indexes").join(uuid.to_string());
|
||||
let index =
|
||||
Index::new(EnvOpenOptions::new(), &index_path, false).with_context(|| {
|
||||
format!("While trying to open the index at path {:?}", index_path.display())
|
||||
})?;
|
||||
|
||||
eprintln!("Trying to get a read transaction on the {uid} index...");
|
||||
|
||||
let rtxn = index.read_txn()?;
|
||||
for part in index_parts {
|
||||
match part {
|
||||
IndexPart::Arroy => {
|
||||
let mut count = 0;
|
||||
let total = index.vector_arroy.len(&rtxn)?;
|
||||
eprintln!("Hair drying arroy for {uid}...");
|
||||
for (i, result) in index
|
||||
.vector_arroy
|
||||
.remap_types::<Bytes, Bytes>()
|
||||
.iter(&rtxn)?
|
||||
.enumerate()
|
||||
{
|
||||
let (key, value) = result?;
|
||||
|
||||
// All of this just to avoid compiler optimizations 🤞
|
||||
// We must read all the bytes to make the pages hot in cache.
|
||||
// <https://doc.rust-lang.org/std/hint/fn.black_box.html>
|
||||
count += std::hint::black_box(key.iter().fold(0, |acc, _| acc + 1));
|
||||
count += std::hint::black_box(value.iter().fold(0, |acc, _| acc + 1));
|
||||
|
||||
if i % 10_000 == 0 {
|
||||
let perc = (i as f64) / (total as f64) * 100.0;
|
||||
eprintln!("Visited {i}/{total} ({perc:.2}%) keys")
|
||||
}
|
||||
}
|
||||
eprintln!("Done hair drying a total of at least {count} bytes.");
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
eprintln!("Found index {uid} but it's not the right index...");
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
<p align="center">
|
||||
<img alt="the milli logo" src="../assets/milli-logo.svg">
|
||||
<img alt="the milli logo" src="../../assets/milli-logo.svg">
|
||||
</p>
|
||||
|
||||
<p align="center">a concurrent indexer combined with fast and relevant search algorithms</p>
|
||||
|
@ -22,7 +22,7 @@ use crate::heed_codec::version::VersionCodec;
|
||||
use crate::heed_codec::{BEU16StrCodec, FstSetCodec, StrBEU16Codec, StrRefCodec};
|
||||
use crate::order_by_map::OrderByMap;
|
||||
use crate::proximity::ProximityPrecision;
|
||||
use crate::vector::{ArroyWrapper, Embedding, EmbeddingConfig};
|
||||
use crate::vector::{ArroyStats, ArroyWrapper, Embedding, EmbeddingConfig};
|
||||
use crate::{
|
||||
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
|
||||
FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec,
|
||||
@ -1731,6 +1731,18 @@ impl Index {
|
||||
let compute_prefixes = self.prefix_search(rtxn)?.unwrap_or_default();
|
||||
Ok(PrefixSettings { compute_prefixes, max_prefix_length: 4, prefix_count_threshold: 100 })
|
||||
}
|
||||
|
||||
pub fn arroy_stats(&self, rtxn: &RoTxn<'_>) -> Result<ArroyStats> {
|
||||
let mut stats = ArroyStats::default();
|
||||
let embedding_configs = self.embedding_configs(rtxn)?;
|
||||
for config in embedding_configs {
|
||||
let embedder_id = self.embedder_category_id.get(rtxn, &config.name)?.unwrap();
|
||||
let reader =
|
||||
ArroyWrapper::new(self.vector_arroy, embedder_id, config.config.quantized());
|
||||
reader.aggregate_stats(rtxn, &mut stats)?;
|
||||
}
|
||||
Ok(stats)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
|
@ -1,7 +1,7 @@
|
||||
use std::cmp::Ordering;
|
||||
|
||||
use itertools::Itertools;
|
||||
use serde::Serialize;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::distance_between_two_points;
|
||||
|
||||
@ -36,6 +36,15 @@ enum RankOrValue<'a> {
|
||||
Score(f64),
|
||||
}
|
||||
|
||||
#[derive(Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub enum WeightedScoreValue {
|
||||
WeightedScore(f64),
|
||||
Sort { asc: bool, value: serde_json::Value },
|
||||
GeoSort { asc: bool, distance: Option<f64> },
|
||||
VectorSort(f64),
|
||||
}
|
||||
|
||||
impl ScoreDetails {
|
||||
pub fn local_score(&self) -> Option<f64> {
|
||||
self.rank().map(Rank::local_score)
|
||||
@ -87,6 +96,30 @@ impl ScoreDetails {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn weighted_score_values<'a>(
|
||||
details: impl Iterator<Item = &'a Self> + 'a,
|
||||
weight: f64,
|
||||
) -> impl Iterator<Item = WeightedScoreValue> + 'a {
|
||||
details
|
||||
.map(ScoreDetails::rank_or_value)
|
||||
.coalesce(|left, right| match (left, right) {
|
||||
(RankOrValue::Rank(left), RankOrValue::Rank(right)) => {
|
||||
Ok(RankOrValue::Rank(Rank::merge(left, right)))
|
||||
}
|
||||
(left, right) => Err((left, right)),
|
||||
})
|
||||
.map(move |rank_or_value| match rank_or_value {
|
||||
RankOrValue::Rank(r) => WeightedScoreValue::WeightedScore(r.local_score() * weight),
|
||||
RankOrValue::Sort(s) => {
|
||||
WeightedScoreValue::Sort { asc: s.ascending, value: s.value.clone() }
|
||||
}
|
||||
RankOrValue::GeoSort(g) => {
|
||||
WeightedScoreValue::GeoSort { asc: g.ascending, distance: g.distance() }
|
||||
}
|
||||
RankOrValue::Score(s) => WeightedScoreValue::VectorSort(s * weight),
|
||||
})
|
||||
}
|
||||
|
||||
fn rank_or_value(&self) -> RankOrValue<'_> {
|
||||
match self {
|
||||
ScoreDetails::Words(w) => RankOrValue::Rank(w.rank()),
|
||||
@ -423,34 +456,58 @@ pub struct Sort {
|
||||
pub value: serde_json::Value,
|
||||
}
|
||||
|
||||
pub fn compare_sort_values(
|
||||
ascending: bool,
|
||||
left: &serde_json::Value,
|
||||
right: &serde_json::Value,
|
||||
) -> Ordering {
|
||||
use serde_json::Value::*;
|
||||
match (left, right) {
|
||||
(Null, Null) => Ordering::Equal,
|
||||
(Null, _) => Ordering::Less,
|
||||
(_, Null) => Ordering::Greater,
|
||||
// numbers are always before strings
|
||||
(Number(_), String(_)) => Ordering::Greater,
|
||||
(String(_), Number(_)) => Ordering::Less,
|
||||
(Number(left), Number(right)) => {
|
||||
// FIXME: unwrap permitted here?
|
||||
let order = left
|
||||
.as_f64()
|
||||
.unwrap()
|
||||
.partial_cmp(&right.as_f64().unwrap())
|
||||
.unwrap_or(Ordering::Equal);
|
||||
// 12 < 42, and when ascending, we want to see 12 first, so the smallest.
|
||||
// Hence, when ascending, smaller is better
|
||||
if ascending {
|
||||
order.reverse()
|
||||
} else {
|
||||
order
|
||||
}
|
||||
}
|
||||
(String(left), String(right)) => {
|
||||
let order = left.cmp(right);
|
||||
// Taking e.g. "a" and "z"
|
||||
// "a" < "z", and when ascending, we want to see "a" first, so the smallest.
|
||||
// Hence, when ascending, smaller is better
|
||||
if ascending {
|
||||
order.reverse()
|
||||
} else {
|
||||
order
|
||||
}
|
||||
}
|
||||
(left, right) => {
|
||||
tracing::warn!(%left, %right, "sort values that are neither numbers, strings or null, handling as equal");
|
||||
Ordering::Equal
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialOrd for Sort {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
if self.ascending != other.ascending {
|
||||
return None;
|
||||
}
|
||||
match (&self.value, &other.value) {
|
||||
(serde_json::Value::Null, serde_json::Value::Null) => Some(Ordering::Equal),
|
||||
(serde_json::Value::Null, _) => Some(Ordering::Less),
|
||||
(_, serde_json::Value::Null) => Some(Ordering::Greater),
|
||||
// numbers are always before strings
|
||||
(serde_json::Value::Number(_), serde_json::Value::String(_)) => Some(Ordering::Greater),
|
||||
(serde_json::Value::String(_), serde_json::Value::Number(_)) => Some(Ordering::Less),
|
||||
(serde_json::Value::Number(left), serde_json::Value::Number(right)) => {
|
||||
// FIXME: unwrap permitted here?
|
||||
let order = left.as_f64().unwrap().partial_cmp(&right.as_f64().unwrap())?;
|
||||
// 12 < 42, and when ascending, we want to see 12 first, so the smallest.
|
||||
// Hence, when ascending, smaller is better
|
||||
Some(if self.ascending { order.reverse() } else { order })
|
||||
}
|
||||
(serde_json::Value::String(left), serde_json::Value::String(right)) => {
|
||||
let order = left.cmp(right);
|
||||
// Taking e.g. "a" and "z"
|
||||
// "a" < "z", and when ascending, we want to see "a" first, so the smallest.
|
||||
// Hence, when ascending, smaller is better
|
||||
Some(if self.ascending { order.reverse() } else { order })
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
Some(compare_sort_values(self.ascending, &self.value, &other.value))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -11,7 +11,7 @@ use either::Either;
|
||||
pub use matching_words::MatchingWords;
|
||||
use matching_words::{MatchType, PartialMatch};
|
||||
use r#match::{Match, MatchPosition};
|
||||
use serde::Serialize;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use simple_token_kind::SimpleTokenKind;
|
||||
use utoipa::ToSchema;
|
||||
|
||||
@ -101,11 +101,11 @@ impl FormatOptions {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Debug, Clone, PartialEq, Eq, ToSchema)]
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, ToSchema)]
|
||||
pub struct MatchBounds {
|
||||
pub start: usize,
|
||||
pub length: usize,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
#[serde(skip_serializing_if = "Option::is_none", default)]
|
||||
pub indices: Option<Vec<usize>>,
|
||||
}
|
||||
|
||||
|
@ -563,7 +563,7 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::universe")]
|
||||
#[tracing::instrument(level = "debug", skip_all, target = "search::universe")]
|
||||
pub fn filtered_universe(
|
||||
index: &Index,
|
||||
txn: &RoTxn<'_>,
|
||||
|
@ -1,4 +1,4 @@
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
|
||||
use std::sync::Arc;
|
||||
|
||||
use rayon::{ThreadPool, ThreadPoolBuilder};
|
||||
@ -9,6 +9,8 @@ use thiserror::Error;
|
||||
#[derive(Debug)]
|
||||
pub struct ThreadPoolNoAbort {
|
||||
thread_pool: ThreadPool,
|
||||
/// The number of active operations.
|
||||
active_operations: AtomicUsize,
|
||||
/// Set to true if the thread pool catched a panic.
|
||||
pool_catched_panic: Arc<AtomicBool>,
|
||||
}
|
||||
@ -19,7 +21,9 @@ impl ThreadPoolNoAbort {
|
||||
OP: FnOnce() -> R + Send,
|
||||
R: Send,
|
||||
{
|
||||
self.active_operations.fetch_add(1, Ordering::Relaxed);
|
||||
let output = self.thread_pool.install(op);
|
||||
self.active_operations.fetch_sub(1, Ordering::Relaxed);
|
||||
// While reseting the pool panic catcher we return an error if we catched one.
|
||||
if self.pool_catched_panic.swap(false, Ordering::SeqCst) {
|
||||
Err(PanicCatched)
|
||||
@ -31,6 +35,11 @@ impl ThreadPoolNoAbort {
|
||||
pub fn current_num_threads(&self) -> usize {
|
||||
self.thread_pool.current_num_threads()
|
||||
}
|
||||
|
||||
/// The number of active operations.
|
||||
pub fn active_operations(&self) -> usize {
|
||||
self.active_operations.load(Ordering::Relaxed)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
@ -64,6 +73,10 @@ impl ThreadPoolNoAbortBuilder {
|
||||
let catched_panic = pool_catched_panic.clone();
|
||||
move |_result| catched_panic.store(true, Ordering::SeqCst)
|
||||
});
|
||||
Ok(ThreadPoolNoAbort { thread_pool: self.0.build()?, pool_catched_panic })
|
||||
Ok(ThreadPoolNoAbort {
|
||||
thread_pool: self.0.build()?,
|
||||
active_operations: AtomicUsize::new(0),
|
||||
pool_catched_panic,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -5,6 +5,8 @@ use std::marker::PhantomData;
|
||||
use std::mem;
|
||||
use std::num::NonZeroU16;
|
||||
use std::ops::Range;
|
||||
use std::sync::atomic::{self, AtomicUsize};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use bbqueue::framed::{FrameGrantR, FrameProducer};
|
||||
@ -71,12 +73,23 @@ pub fn extractor_writer_bbqueue(
|
||||
consumer
|
||||
});
|
||||
|
||||
let sent_messages_attempts = Arc::new(AtomicUsize::new(0));
|
||||
let blocking_sent_messages_attempts = Arc::new(AtomicUsize::new(0));
|
||||
|
||||
let (sender, receiver) = flume::bounded(channel_capacity);
|
||||
let sender = ExtractorBbqueueSender { sender, producers, max_grant };
|
||||
let sender = ExtractorBbqueueSender {
|
||||
sender,
|
||||
producers,
|
||||
max_grant,
|
||||
sent_messages_attempts: sent_messages_attempts.clone(),
|
||||
blocking_sent_messages_attempts: blocking_sent_messages_attempts.clone(),
|
||||
};
|
||||
let receiver = WriterBbqueueReceiver {
|
||||
receiver,
|
||||
look_at_consumer: (0..consumers.len()).cycle(),
|
||||
consumers,
|
||||
sent_messages_attempts,
|
||||
blocking_sent_messages_attempts,
|
||||
};
|
||||
(sender, receiver)
|
||||
}
|
||||
@ -92,6 +105,12 @@ pub struct ExtractorBbqueueSender<'a> {
|
||||
/// It will never be able to store more than that as the
|
||||
/// buffer cannot split data into two parts.
|
||||
max_grant: usize,
|
||||
/// The total number of attempts to send messages
|
||||
/// over the bbqueue channel.
|
||||
sent_messages_attempts: Arc<AtomicUsize>,
|
||||
/// The number of times an attempt to send a
|
||||
/// messages failed and we had to pause for a bit.
|
||||
blocking_sent_messages_attempts: Arc<AtomicUsize>,
|
||||
}
|
||||
|
||||
pub struct WriterBbqueueReceiver<'a> {
|
||||
@ -104,6 +123,12 @@ pub struct WriterBbqueueReceiver<'a> {
|
||||
look_at_consumer: Cycle<Range<usize>>,
|
||||
/// The BBQueue frames to read when waking-up.
|
||||
consumers: Vec<bbqueue::framed::FrameConsumer<'a>>,
|
||||
/// The total number of attempts to send messages
|
||||
/// over the bbqueue channel.
|
||||
sent_messages_attempts: Arc<AtomicUsize>,
|
||||
/// The number of times an attempt to send a
|
||||
/// message failed and we had to pause for a bit.
|
||||
blocking_sent_messages_attempts: Arc<AtomicUsize>,
|
||||
}
|
||||
|
||||
/// The action to perform on the receiver/writer side.
|
||||
@ -169,6 +194,16 @@ impl<'a> WriterBbqueueReceiver<'a> {
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Returns the total count of attempts to send messages through the BBQueue channel.
|
||||
pub fn sent_messages_attempts(&self) -> usize {
|
||||
self.sent_messages_attempts.load(atomic::Ordering::Relaxed)
|
||||
}
|
||||
|
||||
/// Returns the count of attempts to send messages that had to be paused due to BBQueue being full.
|
||||
pub fn blocking_sent_messages_attempts(&self) -> usize {
|
||||
self.blocking_sent_messages_attempts.load(atomic::Ordering::Relaxed)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct FrameWithHeader<'a> {
|
||||
@ -458,10 +493,17 @@ impl<'b> ExtractorBbqueueSender<'b> {
|
||||
}
|
||||
|
||||
// Spin loop to have a frame the size we requested.
|
||||
reserve_and_write_grant(&mut producer, total_length, &self.sender, |grant| {
|
||||
payload_header.serialize_into(grant);
|
||||
Ok(())
|
||||
})?;
|
||||
reserve_and_write_grant(
|
||||
&mut producer,
|
||||
total_length,
|
||||
&self.sender,
|
||||
&self.sent_messages_attempts,
|
||||
&self.blocking_sent_messages_attempts,
|
||||
|grant| {
|
||||
payload_header.serialize_into(grant);
|
||||
Ok(())
|
||||
},
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@ -500,20 +542,28 @@ impl<'b> ExtractorBbqueueSender<'b> {
|
||||
}
|
||||
|
||||
// Spin loop to have a frame the size we requested.
|
||||
reserve_and_write_grant(&mut producer, total_length, &self.sender, |grant| {
|
||||
let header_size = payload_header.header_size();
|
||||
let (header_bytes, remaining) = grant.split_at_mut(header_size);
|
||||
payload_header.serialize_into(header_bytes);
|
||||
reserve_and_write_grant(
|
||||
&mut producer,
|
||||
total_length,
|
||||
&self.sender,
|
||||
&self.sent_messages_attempts,
|
||||
&self.blocking_sent_messages_attempts,
|
||||
|grant| {
|
||||
let header_size = payload_header.header_size();
|
||||
let (header_bytes, remaining) = grant.split_at_mut(header_size);
|
||||
payload_header.serialize_into(header_bytes);
|
||||
|
||||
if dimensions != 0 {
|
||||
let output_iter = remaining.chunks_exact_mut(dimensions * mem::size_of::<f32>());
|
||||
for (embedding, output) in embeddings.iter().zip(output_iter) {
|
||||
output.copy_from_slice(bytemuck::cast_slice(embedding));
|
||||
if dimensions != 0 {
|
||||
let output_iter =
|
||||
remaining.chunks_exact_mut(dimensions * mem::size_of::<f32>());
|
||||
for (embedding, output) in embeddings.iter().zip(output_iter) {
|
||||
output.copy_from_slice(bytemuck::cast_slice(embedding));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
})?;
|
||||
Ok(())
|
||||
},
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@ -571,13 +621,20 @@ impl<'b> ExtractorBbqueueSender<'b> {
|
||||
}
|
||||
|
||||
// Spin loop to have a frame the size we requested.
|
||||
reserve_and_write_grant(&mut producer, total_length, &self.sender, |grant| {
|
||||
let header_size = payload_header.header_size();
|
||||
let (header_bytes, remaining) = grant.split_at_mut(header_size);
|
||||
payload_header.serialize_into(header_bytes);
|
||||
let (key_buffer, value_buffer) = remaining.split_at_mut(key_length.get() as usize);
|
||||
key_value_writer(key_buffer, value_buffer)
|
||||
})?;
|
||||
reserve_and_write_grant(
|
||||
&mut producer,
|
||||
total_length,
|
||||
&self.sender,
|
||||
&self.sent_messages_attempts,
|
||||
&self.blocking_sent_messages_attempts,
|
||||
|grant| {
|
||||
let header_size = payload_header.header_size();
|
||||
let (header_bytes, remaining) = grant.split_at_mut(header_size);
|
||||
payload_header.serialize_into(header_bytes);
|
||||
let (key_buffer, value_buffer) = remaining.split_at_mut(key_length.get() as usize);
|
||||
key_value_writer(key_buffer, value_buffer)
|
||||
},
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@ -619,12 +676,19 @@ impl<'b> ExtractorBbqueueSender<'b> {
|
||||
}
|
||||
|
||||
// Spin loop to have a frame the size we requested.
|
||||
reserve_and_write_grant(&mut producer, total_length, &self.sender, |grant| {
|
||||
let header_size = payload_header.header_size();
|
||||
let (header_bytes, remaining) = grant.split_at_mut(header_size);
|
||||
payload_header.serialize_into(header_bytes);
|
||||
key_writer(remaining)
|
||||
})?;
|
||||
reserve_and_write_grant(
|
||||
&mut producer,
|
||||
total_length,
|
||||
&self.sender,
|
||||
&self.sent_messages_attempts,
|
||||
&self.blocking_sent_messages_attempts,
|
||||
|grant| {
|
||||
let header_size = payload_header.header_size();
|
||||
let (header_bytes, remaining) = grant.split_at_mut(header_size);
|
||||
payload_header.serialize_into(header_bytes);
|
||||
key_writer(remaining)
|
||||
},
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@ -637,12 +701,18 @@ fn reserve_and_write_grant<F>(
|
||||
producer: &mut FrameProducer,
|
||||
total_length: usize,
|
||||
sender: &flume::Sender<ReceiverAction>,
|
||||
sent_messages_attempts: &AtomicUsize,
|
||||
blocking_sent_messages_attempts: &AtomicUsize,
|
||||
f: F,
|
||||
) -> crate::Result<()>
|
||||
where
|
||||
F: FnOnce(&mut [u8]) -> crate::Result<()>,
|
||||
{
|
||||
loop {
|
||||
// An attempt means trying multiple times
|
||||
// whether is succeeded or not.
|
||||
sent_messages_attempts.fetch_add(1, atomic::Ordering::Relaxed);
|
||||
|
||||
for _ in 0..10_000 {
|
||||
match producer.grant(total_length) {
|
||||
Ok(mut grant) => {
|
||||
@ -666,6 +736,10 @@ where
|
||||
return Err(Error::InternalError(InternalError::AbortedIndexation));
|
||||
}
|
||||
|
||||
// We made an attempt to send a message in the
|
||||
// bbqueue channel but it didn't succeed.
|
||||
blocking_sent_messages_attempts.fetch_add(1, atomic::Ordering::Relaxed);
|
||||
|
||||
// We prefer to yield and allow the writing thread
|
||||
// to do its job, especially beneficial when there
|
||||
// is only one CPU core available.
|
||||
|
@ -144,7 +144,7 @@ impl<'doc> Update<'doc> {
|
||||
)?)
|
||||
}
|
||||
|
||||
pub fn updated(&self) -> DocumentFromVersions<'_, 'doc> {
|
||||
pub fn only_changed_fields(&self) -> DocumentFromVersions<'_, 'doc> {
|
||||
DocumentFromVersions::new(&self.new)
|
||||
}
|
||||
|
||||
@ -182,7 +182,7 @@ impl<'doc> Update<'doc> {
|
||||
let mut cached_current = None;
|
||||
let mut updated_selected_field_count = 0;
|
||||
|
||||
for entry in self.updated().iter_top_level_fields() {
|
||||
for entry in self.only_changed_fields().iter_top_level_fields() {
|
||||
let (key, updated_value) = entry?;
|
||||
|
||||
if perm_json_p::select_field(key, fields, &[]) == perm_json_p::Selection::Skip {
|
||||
@ -241,7 +241,7 @@ impl<'doc> Update<'doc> {
|
||||
Ok(has_deleted_fields)
|
||||
}
|
||||
|
||||
pub fn updated_vectors(
|
||||
pub fn only_changed_vectors(
|
||||
&self,
|
||||
doc_alloc: &'doc Bump,
|
||||
embedders: &'doc EmbeddingConfigs,
|
||||
|
@ -199,7 +199,7 @@ impl<'extractor> Extractor<'extractor> for GeoExtractor {
|
||||
.transpose()?;
|
||||
|
||||
let updated_geo = update
|
||||
.updated()
|
||||
.merged(rtxn, index, db_fields_ids_map)?
|
||||
.geo_field()?
|
||||
.map(|geo| extract_geo_coordinates(external_id, geo))
|
||||
.transpose()?;
|
||||
|
@ -99,7 +99,8 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a, 'b> {
|
||||
context.db_fields_ids_map,
|
||||
&context.doc_alloc,
|
||||
)?;
|
||||
let new_vectors = update.updated_vectors(&context.doc_alloc, self.embedders)?;
|
||||
let new_vectors =
|
||||
update.only_changed_vectors(&context.doc_alloc, self.embedders)?;
|
||||
|
||||
if let Some(new_vectors) = &new_vectors {
|
||||
unused_vectors_distribution.append(new_vectors)?;
|
||||
|
@ -234,7 +234,7 @@ where
|
||||
);
|
||||
let mut datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "vectors");
|
||||
let span = tracing::debug_span!(target: "indexing::documents::extract", "vectors");
|
||||
let _entered = span.enter();
|
||||
|
||||
extract(
|
||||
@ -247,7 +247,7 @@ where
|
||||
)?;
|
||||
}
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", "vectors");
|
||||
let span = tracing::debug_span!(target: "indexing::documents::merge", "vectors");
|
||||
let _entered = span.enter();
|
||||
|
||||
for config in &mut index_embeddings {
|
||||
|
@ -1,5 +1,5 @@
|
||||
use std::sync::atomic::AtomicBool;
|
||||
use std::sync::RwLock;
|
||||
use std::sync::{Once, RwLock};
|
||||
use std::thread::{self, Builder};
|
||||
|
||||
use big_s::S;
|
||||
@ -33,6 +33,8 @@ mod post_processing;
|
||||
mod update_by_function;
|
||||
mod write;
|
||||
|
||||
static LOG_MEMORY_METRICS_ONCE: Once = Once::new();
|
||||
|
||||
/// This is the main function of this crate.
|
||||
///
|
||||
/// Give it the output of the [`Indexer::document_changes`] method and it will execute it in the [`rayon::ThreadPool`].
|
||||
@ -93,6 +95,15 @@ where
|
||||
},
|
||||
);
|
||||
|
||||
LOG_MEMORY_METRICS_ONCE.call_once(|| {
|
||||
tracing::debug!(
|
||||
"Indexation allocated memory metrics - \
|
||||
Total BBQueue size: {total_bbbuffer_capacity}, \
|
||||
Total extractor memory: {:?}",
|
||||
grenad_parameters.max_memory,
|
||||
);
|
||||
});
|
||||
|
||||
let (extractor_sender, writer_receiver) = pool
|
||||
.install(|| extractor_writer_bbqueue(&mut bbbuffers, total_bbbuffer_capacity, 1000))
|
||||
.unwrap();
|
||||
@ -179,13 +190,16 @@ where
|
||||
|
||||
indexing_context.progress.update_progress(IndexingStep::WritingEmbeddingsToDatabase);
|
||||
|
||||
build_vectors(
|
||||
index,
|
||||
wtxn,
|
||||
index_embeddings,
|
||||
&mut arroy_writers,
|
||||
&indexing_context.must_stop_processing,
|
||||
)?;
|
||||
pool.install(|| {
|
||||
build_vectors(
|
||||
index,
|
||||
wtxn,
|
||||
index_embeddings,
|
||||
&mut arroy_writers,
|
||||
&indexing_context.must_stop_processing,
|
||||
)
|
||||
})
|
||||
.unwrap()?;
|
||||
|
||||
post_processing::post_process(
|
||||
indexing_context,
|
||||
|
@ -72,11 +72,23 @@ pub(super) fn write_to_db(
|
||||
&mut aligned_embedding,
|
||||
)?;
|
||||
}
|
||||
|
||||
write_from_bbqueue(&mut writer_receiver, index, wtxn, arroy_writers, &mut aligned_embedding)?;
|
||||
|
||||
let direct_attempts = writer_receiver.sent_messages_attempts();
|
||||
let blocking_attempts = writer_receiver.blocking_sent_messages_attempts();
|
||||
let congestion_pct = (blocking_attempts as f64 / direct_attempts as f64) * 100.0;
|
||||
tracing::debug!(
|
||||
"Channel congestion metrics - \
|
||||
Attempts: {direct_attempts}, \
|
||||
Blocked attempts: {blocking_attempts} \
|
||||
({congestion_pct:.1}% congestion)"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::vectors")]
|
||||
#[tracing::instrument(level = "debug", skip_all, target = "indexing::vectors")]
|
||||
pub(super) fn build_vectors<MSP>(
|
||||
index: &Index,
|
||||
wtxn: &mut RwTxn<'_>,
|
||||
|
@ -1,7 +1,9 @@
|
||||
mod v1_12;
|
||||
mod v1_13;
|
||||
|
||||
use heed::RwTxn;
|
||||
use v1_12::{V1_12_3_To_Current, V1_12_To_V1_12_3};
|
||||
use v1_12::{V1_12_3_To_V1_13_0, V1_12_To_V1_12_3};
|
||||
use v1_13::V1_13_0_To_Current;
|
||||
|
||||
use crate::progress::{Progress, VariableNameStep};
|
||||
use crate::{Index, InternalError, Result};
|
||||
@ -26,11 +28,13 @@ pub fn upgrade(
|
||||
progress: Progress,
|
||||
) -> Result<bool> {
|
||||
let from = index.get_version(wtxn)?.unwrap_or(db_version);
|
||||
let upgrade_functions: &[&dyn UpgradeIndex] = &[&V1_12_To_V1_12_3 {}, &V1_12_3_To_Current()];
|
||||
let upgrade_functions: &[&dyn UpgradeIndex] =
|
||||
&[&V1_12_To_V1_12_3 {}, &V1_12_3_To_V1_13_0 {}, &V1_13_0_To_Current()];
|
||||
|
||||
let start = match from {
|
||||
(1, 12, 0..=2) => 0,
|
||||
(1, 12, 3..) => 1,
|
||||
(1, 13, 0) => 2,
|
||||
// We must handle the current version in the match because in case of a failure some index may have been upgraded but not other.
|
||||
(1, 13, _) => return Ok(false),
|
||||
(major, minor, patch) => {
|
||||
|
@ -1,11 +1,9 @@
|
||||
use heed::RwTxn;
|
||||
|
||||
use crate::constants::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
|
||||
use super::UpgradeIndex;
|
||||
use crate::progress::Progress;
|
||||
use crate::{make_enum_progress, Index, Result};
|
||||
|
||||
use super::UpgradeIndex;
|
||||
|
||||
#[allow(non_camel_case_types)]
|
||||
pub(super) struct V1_12_To_V1_12_3 {}
|
||||
|
||||
@ -33,9 +31,9 @@ impl UpgradeIndex for V1_12_To_V1_12_3 {
|
||||
}
|
||||
|
||||
#[allow(non_camel_case_types)]
|
||||
pub(super) struct V1_12_3_To_Current();
|
||||
pub(super) struct V1_12_3_To_V1_13_0 {}
|
||||
|
||||
impl UpgradeIndex for V1_12_3_To_Current {
|
||||
impl UpgradeIndex for V1_12_3_To_V1_13_0 {
|
||||
fn upgrade(
|
||||
&self,
|
||||
_wtxn: &mut RwTxn,
|
||||
@ -43,14 +41,11 @@ impl UpgradeIndex for V1_12_3_To_Current {
|
||||
_original: (u32, u32, u32),
|
||||
_progress: Progress,
|
||||
) -> Result<bool> {
|
||||
Ok(false)
|
||||
// recompute the indexes stats
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
fn target_version(&self) -> (u32, u32, u32) {
|
||||
(
|
||||
VERSION_MAJOR.parse().unwrap(),
|
||||
VERSION_MINOR.parse().unwrap(),
|
||||
VERSION_PATCH.parse().unwrap(),
|
||||
)
|
||||
(1, 13, 0)
|
||||
}
|
||||
}
|
||||
|
29
crates/milli/src/update/upgrade/v1_13.rs
Normal file
29
crates/milli/src/update/upgrade/v1_13.rs
Normal file
@ -0,0 +1,29 @@
|
||||
use heed::RwTxn;
|
||||
|
||||
use super::UpgradeIndex;
|
||||
use crate::constants::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
|
||||
use crate::progress::Progress;
|
||||
use crate::{Index, Result};
|
||||
|
||||
#[allow(non_camel_case_types)]
|
||||
pub(super) struct V1_13_0_To_Current();
|
||||
|
||||
impl UpgradeIndex for V1_13_0_To_Current {
|
||||
fn upgrade(
|
||||
&self,
|
||||
_wtxn: &mut RwTxn,
|
||||
_index: &Index,
|
||||
_original: (u32, u32, u32),
|
||||
_progress: Progress,
|
||||
) -> Result<bool> {
|
||||
Ok(false)
|
||||
}
|
||||
|
||||
fn target_version(&self) -> (u32, u32, u32) {
|
||||
(
|
||||
VERSION_MAJOR.parse().unwrap(),
|
||||
VERSION_MINOR.parse().unwrap(),
|
||||
VERSION_PATCH.parse().unwrap(),
|
||||
)
|
||||
}
|
||||
}
|
@ -410,8 +410,43 @@ impl ArroyWrapper {
|
||||
fn quantized_db(&self) -> arroy::Database<BinaryQuantizedCosine> {
|
||||
self.database.remap_data_type()
|
||||
}
|
||||
|
||||
pub fn aggregate_stats(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
stats: &mut ArroyStats,
|
||||
) -> Result<(), arroy::Error> {
|
||||
if self.quantized {
|
||||
for reader in self.readers(rtxn, self.quantized_db()) {
|
||||
let reader = reader?;
|
||||
let documents = reader.item_ids();
|
||||
if documents.is_empty() {
|
||||
break;
|
||||
}
|
||||
stats.documents |= documents;
|
||||
stats.number_of_embeddings += documents.len();
|
||||
}
|
||||
} else {
|
||||
for reader in self.readers(rtxn, self.angular_db()) {
|
||||
let reader = reader?;
|
||||
let documents = reader.item_ids();
|
||||
if documents.is_empty() {
|
||||
break;
|
||||
}
|
||||
stats.documents |= documents;
|
||||
stats.number_of_embeddings += documents.len();
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Clone)]
|
||||
pub struct ArroyStats {
|
||||
pub number_of_embeddings: u64,
|
||||
pub documents: RoaringBitmap,
|
||||
}
|
||||
/// One or multiple embeddings stored consecutively in a flat vector.
|
||||
pub struct Embeddings<F> {
|
||||
data: Vec<F>,
|
||||
@ -611,6 +646,7 @@ impl Embedder {
|
||||
}
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "debug", skip_all, target = "search")]
|
||||
pub fn embed_one(
|
||||
&self,
|
||||
text: String,
|
||||
|
@ -5,7 +5,7 @@ use rayon::slice::ParallelSlice as _;
|
||||
|
||||
use super::error::{EmbedError, EmbedErrorKind, NewEmbedderError, NewEmbedderErrorKind};
|
||||
use super::rest::{Embedder as RestEmbedder, EmbedderOptions as RestEmbedderOptions};
|
||||
use super::DistributionShift;
|
||||
use super::{DistributionShift, REQUEST_PARALLELISM};
|
||||
use crate::error::FaultSource;
|
||||
use crate::vector::Embedding;
|
||||
use crate::ThreadPoolNoAbort;
|
||||
@ -118,14 +118,20 @@ impl Embedder {
|
||||
text_chunks: Vec<Vec<String>>,
|
||||
threads: &ThreadPoolNoAbort,
|
||||
) -> Result<Vec<Vec<Embedding>>, EmbedError> {
|
||||
threads
|
||||
.install(move || {
|
||||
text_chunks.into_par_iter().map(move |chunk| self.embed(&chunk, None)).collect()
|
||||
})
|
||||
.map_err(|error| EmbedError {
|
||||
kind: EmbedErrorKind::PanicInThreadPool(error),
|
||||
fault: FaultSource::Bug,
|
||||
})?
|
||||
// This condition helps reduce the number of active rayon jobs
|
||||
// so that we avoid consuming all the LMDB rtxns and avoid stack overflows.
|
||||
if threads.active_operations() >= REQUEST_PARALLELISM {
|
||||
text_chunks.into_iter().map(move |chunk| self.embed(&chunk, None)).collect()
|
||||
} else {
|
||||
threads
|
||||
.install(move || {
|
||||
text_chunks.into_par_iter().map(move |chunk| self.embed(&chunk, None)).collect()
|
||||
})
|
||||
.map_err(|error| EmbedError {
|
||||
kind: EmbedErrorKind::PanicInThreadPool(error),
|
||||
fault: FaultSource::Bug,
|
||||
})?
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn embed_chunks_ref(
|
||||
@ -133,20 +139,32 @@ impl Embedder {
|
||||
texts: &[&str],
|
||||
threads: &ThreadPoolNoAbort,
|
||||
) -> Result<Vec<Vec<f32>>, EmbedError> {
|
||||
threads
|
||||
.install(move || {
|
||||
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
|
||||
.par_chunks(self.prompt_count_in_chunk_hint())
|
||||
.map(move |chunk| self.embed(chunk, None))
|
||||
.collect();
|
||||
// This condition helps reduce the number of active rayon jobs
|
||||
// so that we avoid consuming all the LMDB rtxns and avoid stack overflows.
|
||||
if threads.active_operations() >= REQUEST_PARALLELISM {
|
||||
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
|
||||
.chunks(self.prompt_count_in_chunk_hint())
|
||||
.map(move |chunk| self.embed(chunk, None))
|
||||
.collect();
|
||||
|
||||
let embeddings = embeddings?;
|
||||
Ok(embeddings.into_iter().flatten().collect())
|
||||
})
|
||||
.map_err(|error| EmbedError {
|
||||
kind: EmbedErrorKind::PanicInThreadPool(error),
|
||||
fault: FaultSource::Bug,
|
||||
})?
|
||||
let embeddings = embeddings?;
|
||||
Ok(embeddings.into_iter().flatten().collect())
|
||||
} else {
|
||||
threads
|
||||
.install(move || {
|
||||
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
|
||||
.par_chunks(self.prompt_count_in_chunk_hint())
|
||||
.map(move |chunk| self.embed(chunk, None))
|
||||
.collect();
|
||||
|
||||
let embeddings = embeddings?;
|
||||
Ok(embeddings.into_iter().flatten().collect())
|
||||
})
|
||||
.map_err(|error| EmbedError {
|
||||
kind: EmbedErrorKind::PanicInThreadPool(error),
|
||||
fault: FaultSource::Bug,
|
||||
})?
|
||||
}
|
||||
}
|
||||
|
||||
pub fn chunk_count_hint(&self) -> usize {
|
||||
|
@ -7,7 +7,7 @@ use rayon::slice::ParallelSlice as _;
|
||||
|
||||
use super::error::{EmbedError, NewEmbedderError};
|
||||
use super::rest::{Embedder as RestEmbedder, EmbedderOptions as RestEmbedderOptions};
|
||||
use super::DistributionShift;
|
||||
use super::{DistributionShift, REQUEST_PARALLELISM};
|
||||
use crate::error::FaultSource;
|
||||
use crate::vector::error::EmbedErrorKind;
|
||||
use crate::vector::Embedding;
|
||||
@ -255,14 +255,20 @@ impl Embedder {
|
||||
text_chunks: Vec<Vec<String>>,
|
||||
threads: &ThreadPoolNoAbort,
|
||||
) -> Result<Vec<Vec<Embedding>>, EmbedError> {
|
||||
threads
|
||||
.install(move || {
|
||||
text_chunks.into_par_iter().map(move |chunk| self.embed(&chunk, None)).collect()
|
||||
})
|
||||
.map_err(|error| EmbedError {
|
||||
kind: EmbedErrorKind::PanicInThreadPool(error),
|
||||
fault: FaultSource::Bug,
|
||||
})?
|
||||
// This condition helps reduce the number of active rayon jobs
|
||||
// so that we avoid consuming all the LMDB rtxns and avoid stack overflows.
|
||||
if threads.active_operations() >= REQUEST_PARALLELISM {
|
||||
text_chunks.into_iter().map(move |chunk| self.embed(&chunk, None)).collect()
|
||||
} else {
|
||||
threads
|
||||
.install(move || {
|
||||
text_chunks.into_par_iter().map(move |chunk| self.embed(&chunk, None)).collect()
|
||||
})
|
||||
.map_err(|error| EmbedError {
|
||||
kind: EmbedErrorKind::PanicInThreadPool(error),
|
||||
fault: FaultSource::Bug,
|
||||
})?
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn embed_chunks_ref(
|
||||
@ -270,20 +276,31 @@ impl Embedder {
|
||||
texts: &[&str],
|
||||
threads: &ThreadPoolNoAbort,
|
||||
) -> Result<Vec<Vec<f32>>, EmbedError> {
|
||||
threads
|
||||
.install(move || {
|
||||
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
|
||||
.par_chunks(self.prompt_count_in_chunk_hint())
|
||||
.map(move |chunk| self.embed(chunk, None))
|
||||
.collect();
|
||||
// This condition helps reduce the number of active rayon jobs
|
||||
// so that we avoid consuming all the LMDB rtxns and avoid stack overflows.
|
||||
if threads.active_operations() >= REQUEST_PARALLELISM {
|
||||
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
|
||||
.chunks(self.prompt_count_in_chunk_hint())
|
||||
.map(move |chunk| self.embed(chunk, None))
|
||||
.collect();
|
||||
let embeddings = embeddings?;
|
||||
Ok(embeddings.into_iter().flatten().collect())
|
||||
} else {
|
||||
threads
|
||||
.install(move || {
|
||||
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
|
||||
.par_chunks(self.prompt_count_in_chunk_hint())
|
||||
.map(move |chunk| self.embed(chunk, None))
|
||||
.collect();
|
||||
|
||||
let embeddings = embeddings?;
|
||||
Ok(embeddings.into_iter().flatten().collect())
|
||||
})
|
||||
.map_err(|error| EmbedError {
|
||||
kind: EmbedErrorKind::PanicInThreadPool(error),
|
||||
fault: FaultSource::Bug,
|
||||
})?
|
||||
let embeddings = embeddings?;
|
||||
Ok(embeddings.into_iter().flatten().collect())
|
||||
})
|
||||
.map_err(|error| EmbedError {
|
||||
kind: EmbedErrorKind::PanicInThreadPool(error),
|
||||
fault: FaultSource::Bug,
|
||||
})?
|
||||
}
|
||||
}
|
||||
|
||||
pub fn chunk_count_hint(&self) -> usize {
|
||||
|
@ -130,6 +130,7 @@ impl Embedder {
|
||||
let client = ureq::AgentBuilder::new()
|
||||
.max_idle_connections(REQUEST_PARALLELISM * 2)
|
||||
.max_idle_connections_per_host(REQUEST_PARALLELISM * 2)
|
||||
.timeout(std::time::Duration::from_secs(30))
|
||||
.build();
|
||||
|
||||
let request = Request::new(options.request)?;
|
||||
@ -188,14 +189,20 @@ impl Embedder {
|
||||
text_chunks: Vec<Vec<String>>,
|
||||
threads: &ThreadPoolNoAbort,
|
||||
) -> Result<Vec<Vec<Embedding>>, EmbedError> {
|
||||
threads
|
||||
.install(move || {
|
||||
text_chunks.into_par_iter().map(move |chunk| self.embed(chunk, None)).collect()
|
||||
})
|
||||
.map_err(|error| EmbedError {
|
||||
kind: EmbedErrorKind::PanicInThreadPool(error),
|
||||
fault: FaultSource::Bug,
|
||||
})?
|
||||
// This condition helps reduce the number of active rayon jobs
|
||||
// so that we avoid consuming all the LMDB rtxns and avoid stack overflows.
|
||||
if threads.active_operations() >= REQUEST_PARALLELISM {
|
||||
text_chunks.into_iter().map(move |chunk| self.embed(chunk, None)).collect()
|
||||
} else {
|
||||
threads
|
||||
.install(move || {
|
||||
text_chunks.into_par_iter().map(move |chunk| self.embed(chunk, None)).collect()
|
||||
})
|
||||
.map_err(|error| EmbedError {
|
||||
kind: EmbedErrorKind::PanicInThreadPool(error),
|
||||
fault: FaultSource::Bug,
|
||||
})?
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn embed_chunks_ref(
|
||||
@ -203,20 +210,32 @@ impl Embedder {
|
||||
texts: &[&str],
|
||||
threads: &ThreadPoolNoAbort,
|
||||
) -> Result<Vec<Embedding>, EmbedError> {
|
||||
threads
|
||||
.install(move || {
|
||||
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
|
||||
.par_chunks(self.prompt_count_in_chunk_hint())
|
||||
.map(move |chunk| self.embed_ref(chunk, None))
|
||||
.collect();
|
||||
// This condition helps reduce the number of active rayon jobs
|
||||
// so that we avoid consuming all the LMDB rtxns and avoid stack overflows.
|
||||
if threads.active_operations() >= REQUEST_PARALLELISM {
|
||||
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
|
||||
.chunks(self.prompt_count_in_chunk_hint())
|
||||
.map(move |chunk| self.embed_ref(chunk, None))
|
||||
.collect();
|
||||
|
||||
let embeddings = embeddings?;
|
||||
Ok(embeddings.into_iter().flatten().collect())
|
||||
})
|
||||
.map_err(|error| EmbedError {
|
||||
kind: EmbedErrorKind::PanicInThreadPool(error),
|
||||
fault: FaultSource::Bug,
|
||||
})?
|
||||
let embeddings = embeddings?;
|
||||
Ok(embeddings.into_iter().flatten().collect())
|
||||
} else {
|
||||
threads
|
||||
.install(move || {
|
||||
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
|
||||
.par_chunks(self.prompt_count_in_chunk_hint())
|
||||
.map(move |chunk| self.embed_ref(chunk, None))
|
||||
.collect();
|
||||
|
||||
let embeddings = embeddings?;
|
||||
Ok(embeddings.into_iter().flatten().collect())
|
||||
})
|
||||
.map_err(|error| EmbedError {
|
||||
kind: EmbedErrorKind::PanicInThreadPool(error),
|
||||
fault: FaultSource::Bug,
|
||||
})?
|
||||
}
|
||||
}
|
||||
|
||||
pub fn chunk_count_hint(&self) -> usize {
|
||||
|
@ -455,7 +455,7 @@ impl EmbeddingSettings {
|
||||
EmbedderSource::Ollama,
|
||||
EmbedderSource::Rest,
|
||||
],
|
||||
Self::DOCUMENT_TEMPLATE => &[
|
||||
Self::DOCUMENT_TEMPLATE | Self::DOCUMENT_TEMPLATE_MAX_BYTES => &[
|
||||
EmbedderSource::HuggingFace,
|
||||
EmbedderSource::OpenAi,
|
||||
EmbedderSource::Ollama,
|
||||
@ -490,6 +490,7 @@ impl EmbeddingSettings {
|
||||
Self::MODEL,
|
||||
Self::API_KEY,
|
||||
Self::DOCUMENT_TEMPLATE,
|
||||
Self::DOCUMENT_TEMPLATE_MAX_BYTES,
|
||||
Self::DIMENSIONS,
|
||||
Self::DISTRIBUTION,
|
||||
Self::URL,
|
||||
@ -500,6 +501,7 @@ impl EmbeddingSettings {
|
||||
Self::MODEL,
|
||||
Self::REVISION,
|
||||
Self::DOCUMENT_TEMPLATE,
|
||||
Self::DOCUMENT_TEMPLATE_MAX_BYTES,
|
||||
Self::DISTRIBUTION,
|
||||
Self::BINARY_QUANTIZED,
|
||||
],
|
||||
@ -507,6 +509,7 @@ impl EmbeddingSettings {
|
||||
Self::SOURCE,
|
||||
Self::MODEL,
|
||||
Self::DOCUMENT_TEMPLATE,
|
||||
Self::DOCUMENT_TEMPLATE_MAX_BYTES,
|
||||
Self::URL,
|
||||
Self::API_KEY,
|
||||
Self::DIMENSIONS,
|
||||
@ -521,6 +524,7 @@ impl EmbeddingSettings {
|
||||
Self::API_KEY,
|
||||
Self::DIMENSIONS,
|
||||
Self::DOCUMENT_TEMPLATE,
|
||||
Self::DOCUMENT_TEMPLATE_MAX_BYTES,
|
||||
Self::URL,
|
||||
Self::REQUEST,
|
||||
Self::RESPONSE,
|
||||
|
@ -12,16 +12,6 @@
|
||||
}
|
||||
},
|
||||
"precommands": [
|
||||
{
|
||||
"route": "experimental-features",
|
||||
"method": "PATCH",
|
||||
"body": {
|
||||
"inline": {
|
||||
"vectorStore": true
|
||||
}
|
||||
},
|
||||
"synchronous": "DontWait"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/settings",
|
||||
"method": "PATCH",
|
||||
|
@ -12,16 +12,6 @@
|
||||
}
|
||||
},
|
||||
"precommands": [
|
||||
{
|
||||
"route": "experimental-features",
|
||||
"method": "PATCH",
|
||||
"body": {
|
||||
"inline": {
|
||||
"vectorStore": true
|
||||
}
|
||||
},
|
||||
"synchronous": "DontWait"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/settings",
|
||||
"method": "PATCH",
|
||||
|
@ -28,10 +28,10 @@
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/05.ndjson",
|
||||
"sha256": "be31d5632602f798e62d1c10c83bdfda2b4deaa068477eacde05fdd247572b82"
|
||||
},
|
||||
"hackernews-02-modified-filters.ndjson": {
|
||||
"hackernews-modified-number-filters.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02-modified-filters.ndjson",
|
||||
"sha256": "7272cbfd41110d32d7fe168424a0000f07589bfe40f664652b34f4f20aaf3802"
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01-modified-filters.ndjson",
|
||||
"sha256": "b80c245ce1b1df80b9b38800f677f3bd11947ebc62716fb108269d50e796c35c"
|
||||
}
|
||||
},
|
||||
"precommands": [
|
||||
@ -102,7 +102,7 @@
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-02-modified-filters.ndjson"
|
||||
"asset": "hackernews-modified-number-filters.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
}
|
||||
|
@ -28,10 +28,10 @@
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/05.ndjson",
|
||||
"sha256": "be31d5632602f798e62d1c10c83bdfda2b4deaa068477eacde05fdd247572b82"
|
||||
},
|
||||
"hackernews-01-modified-filters.ndjson": {
|
||||
"hackernews-modified-string-filters.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01-modified-filters.ndjson",
|
||||
"sha256": "b80c245ce1b1df80b9b38800f677f3bd11947ebc62716fb108269d50e796c35c"
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02-modified-filters.ndjson",
|
||||
"sha256": "7272cbfd41110d32d7fe168424a0000f07589bfe40f664652b34f4f20aaf3802"
|
||||
}
|
||||
},
|
||||
"precommands": [
|
||||
@ -102,7 +102,7 @@
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-01-modified-filters.ndjson"
|
||||
"asset": "hackernews-modified-string-filters.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user