mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-03 11:57:07 +02:00
write the dump export
This commit is contained in:
parent
8954b1bd1d
commit
b7f9c94f4a
25 changed files with 686 additions and 184 deletions
|
@ -10,6 +10,7 @@ csv = "1.1.6"
|
|||
either = { version = "1.6.1", features = ["serde"] }
|
||||
milli = { git = "https://github.com/meilisearch/milli.git", branch = "indexation-abortion", default-features = false }
|
||||
enum-iterator = "0.7.0"
|
||||
fst = "0.4.7"
|
||||
proptest = { version = "1.0.0", optional = true }
|
||||
proptest-derive = { version = "0.3.0", optional = true }
|
||||
roaring = { version = "0.10.0", features = ["serde"] }
|
||||
|
|
|
@ -9,5 +9,7 @@ pub mod tasks;
|
|||
pub use milli;
|
||||
pub use milli::heed;
|
||||
pub use milli::Index;
|
||||
use uuid::Uuid;
|
||||
|
||||
pub type Document = serde_json::Map<String, serde_json::Value>;
|
||||
pub type InstanceUid = Uuid;
|
||||
|
|
|
@ -2,9 +2,15 @@ use std::collections::{BTreeMap, BTreeSet};
|
|||
use std::marker::PhantomData;
|
||||
use std::num::NonZeroUsize;
|
||||
|
||||
use fst::IntoStreamer;
|
||||
use milli::update::Setting;
|
||||
use milli::{Index, DEFAULT_VALUES_PER_FACET};
|
||||
use serde::{Deserialize, Serialize, Serializer};
|
||||
|
||||
/// The maximimum number of results that the engine
|
||||
/// will be able to return in one search call.
|
||||
pub const DEFAULT_PAGINATION_MAX_TOTAL_HITS: usize = 1000;
|
||||
|
||||
fn serialize_with_wildcard<S>(
|
||||
field: &Setting<Vec<String>>,
|
||||
s: S,
|
||||
|
@ -366,6 +372,114 @@ pub fn apply_settings_to_builder(
|
|||
}
|
||||
}
|
||||
|
||||
pub fn settings(
|
||||
index: &Index,
|
||||
rtxn: &crate::heed::RoTxn,
|
||||
) -> Result<Settings<Checked>, milli::Error> {
|
||||
let displayed_attributes = index
|
||||
.displayed_fields(rtxn)?
|
||||
.map(|fields| fields.into_iter().map(String::from).collect());
|
||||
|
||||
let searchable_attributes = index
|
||||
.user_defined_searchable_fields(rtxn)?
|
||||
.map(|fields| fields.into_iter().map(String::from).collect());
|
||||
|
||||
let filterable_attributes = index.filterable_fields(rtxn)?.into_iter().collect();
|
||||
|
||||
let sortable_attributes = index.sortable_fields(rtxn)?.into_iter().collect();
|
||||
|
||||
let criteria = index
|
||||
.criteria(rtxn)?
|
||||
.into_iter()
|
||||
.map(|c| c.to_string())
|
||||
.collect();
|
||||
|
||||
let stop_words = index
|
||||
.stop_words(rtxn)?
|
||||
.map(|stop_words| -> Result<BTreeSet<_>, milli::Error> {
|
||||
Ok(stop_words.stream().into_strs()?.into_iter().collect())
|
||||
})
|
||||
.transpose()?
|
||||
.unwrap_or_default();
|
||||
let distinct_field = index.distinct_field(rtxn)?.map(String::from);
|
||||
|
||||
// in milli each word in the synonyms map were split on their separator. Since we lost
|
||||
// this information we are going to put space between words.
|
||||
let synonyms = index
|
||||
.synonyms(rtxn)?
|
||||
.iter()
|
||||
.map(|(key, values)| {
|
||||
(
|
||||
key.join(" "),
|
||||
values.iter().map(|value| value.join(" ")).collect(),
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let min_typo_word_len = MinWordSizeTyposSetting {
|
||||
one_typo: Setting::Set(index.min_word_len_one_typo(rtxn)?),
|
||||
two_typos: Setting::Set(index.min_word_len_two_typos(rtxn)?),
|
||||
};
|
||||
|
||||
let disabled_words = match index.exact_words(rtxn)? {
|
||||
Some(fst) => fst.into_stream().into_strs()?.into_iter().collect(),
|
||||
None => BTreeSet::new(),
|
||||
};
|
||||
|
||||
let disabled_attributes = index
|
||||
.exact_attributes(rtxn)?
|
||||
.into_iter()
|
||||
.map(String::from)
|
||||
.collect();
|
||||
|
||||
let typo_tolerance = TypoSettings {
|
||||
enabled: Setting::Set(index.authorize_typos(rtxn)?),
|
||||
min_word_size_for_typos: Setting::Set(min_typo_word_len),
|
||||
disable_on_words: Setting::Set(disabled_words),
|
||||
disable_on_attributes: Setting::Set(disabled_attributes),
|
||||
};
|
||||
|
||||
let faceting = FacetingSettings {
|
||||
max_values_per_facet: Setting::Set(
|
||||
index
|
||||
.max_values_per_facet(rtxn)?
|
||||
.unwrap_or(DEFAULT_VALUES_PER_FACET),
|
||||
),
|
||||
};
|
||||
|
||||
let pagination = PaginationSettings {
|
||||
max_total_hits: Setting::Set(
|
||||
index
|
||||
.pagination_max_total_hits(rtxn)?
|
||||
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS),
|
||||
),
|
||||
};
|
||||
|
||||
Ok(Settings {
|
||||
displayed_attributes: match displayed_attributes {
|
||||
Some(attrs) => Setting::Set(attrs),
|
||||
None => Setting::Reset,
|
||||
},
|
||||
searchable_attributes: match searchable_attributes {
|
||||
Some(attrs) => Setting::Set(attrs),
|
||||
None => Setting::Reset,
|
||||
},
|
||||
filterable_attributes: Setting::Set(filterable_attributes),
|
||||
sortable_attributes: Setting::Set(sortable_attributes),
|
||||
ranking_rules: Setting::Set(criteria),
|
||||
stop_words: Setting::Set(stop_words),
|
||||
distinct_attribute: match distinct_field {
|
||||
Some(field) => Setting::Set(field),
|
||||
None => Setting::Reset,
|
||||
},
|
||||
synonyms: Setting::Set(synonyms),
|
||||
typo_tolerance: Setting::Set(typo_tolerance),
|
||||
faceting: Setting::Set(faceting),
|
||||
pagination: Setting::Set(pagination),
|
||||
_kind: PhantomData,
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod test {
|
||||
use proptest::prelude::*;
|
||||
|
|
|
@ -3,7 +3,6 @@ use roaring::RoaringBitmap;
|
|||
use serde::{Deserialize, Serialize, Serializer};
|
||||
use std::{
|
||||
fmt::{Display, Write},
|
||||
path::PathBuf,
|
||||
str::FromStr,
|
||||
};
|
||||
use time::{Duration, OffsetDateTime};
|
||||
|
@ -11,7 +10,9 @@ use uuid::Uuid;
|
|||
|
||||
use crate::{
|
||||
error::{Code, ResponseError},
|
||||
keys::Key,
|
||||
settings::{Settings, Unchecked},
|
||||
InstanceUid,
|
||||
};
|
||||
|
||||
pub type TaskId = u32;
|
||||
|
@ -71,6 +72,26 @@ impl Task {
|
|||
IndexSwap { lhs, rhs } => Some(vec![lhs, rhs]),
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the content-uuid if there is one
|
||||
pub fn content_uuid(&self) -> Option<&Uuid> {
|
||||
match self.kind {
|
||||
KindWithContent::DocumentImport {
|
||||
ref content_file, ..
|
||||
} => Some(content_file),
|
||||
KindWithContent::DocumentDeletion { .. }
|
||||
| KindWithContent::DocumentClear { .. }
|
||||
| KindWithContent::Settings { .. }
|
||||
| KindWithContent::IndexDeletion { .. }
|
||||
| KindWithContent::IndexCreation { .. }
|
||||
| KindWithContent::IndexUpdate { .. }
|
||||
| KindWithContent::IndexSwap { .. }
|
||||
| KindWithContent::CancelTask { .. }
|
||||
| KindWithContent::DeleteTasks { .. }
|
||||
| KindWithContent::DumpExport { .. }
|
||||
| KindWithContent::Snapshot => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
|
@ -120,7 +141,9 @@ pub enum KindWithContent {
|
|||
tasks: RoaringBitmap,
|
||||
},
|
||||
DumpExport {
|
||||
output: PathBuf,
|
||||
dump_uid: String,
|
||||
keys: Vec<Key>,
|
||||
instance_uid: Option<InstanceUid>,
|
||||
},
|
||||
Snapshot,
|
||||
}
|
||||
|
@ -167,7 +190,7 @@ impl KindWithContent {
|
|||
documents_count, ..
|
||||
} => Some(Details::DocumentAddition {
|
||||
received_documents: *documents_count,
|
||||
indexed_documents: 0,
|
||||
indexed_documents: Some(0),
|
||||
}),
|
||||
KindWithContent::DocumentDeletion {
|
||||
index_uid: _,
|
||||
|
@ -204,6 +227,38 @@ impl KindWithContent {
|
|||
}
|
||||
}
|
||||
|
||||
impl From<&KindWithContent> for Option<Details> {
|
||||
fn from(kind: &KindWithContent) -> Self {
|
||||
match kind {
|
||||
KindWithContent::DocumentImport {
|
||||
documents_count, ..
|
||||
} => Some(Details::DocumentAddition {
|
||||
received_documents: *documents_count,
|
||||
indexed_documents: None,
|
||||
}),
|
||||
KindWithContent::DocumentDeletion { .. } => None,
|
||||
KindWithContent::DocumentClear { .. } => None,
|
||||
KindWithContent::Settings { new_settings, .. } => Some(Details::Settings {
|
||||
settings: new_settings.clone(),
|
||||
}),
|
||||
KindWithContent::IndexDeletion { .. } => None,
|
||||
KindWithContent::IndexCreation { primary_key, .. } => Some(Details::IndexInfo {
|
||||
primary_key: primary_key.clone(),
|
||||
}),
|
||||
KindWithContent::IndexUpdate { primary_key, .. } => Some(Details::IndexInfo {
|
||||
primary_key: primary_key.clone(),
|
||||
}),
|
||||
KindWithContent::IndexSwap { .. } => None,
|
||||
KindWithContent::CancelTask { .. } => None,
|
||||
KindWithContent::DeleteTasks { .. } => todo!(),
|
||||
KindWithContent::DumpExport { dump_uid, .. } => Some(Details::Dump {
|
||||
dump_uid: dump_uid.clone(),
|
||||
}),
|
||||
KindWithContent::Snapshot => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub enum Status {
|
||||
|
@ -289,7 +344,7 @@ impl FromStr for Kind {
|
|||
pub enum Details {
|
||||
DocumentAddition {
|
||||
received_documents: u64,
|
||||
indexed_documents: u64,
|
||||
indexed_documents: Option<u64>,
|
||||
},
|
||||
Settings {
|
||||
settings: Settings<Unchecked>,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue