mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-22 03:07:27 +01:00
Merge remote-tracking branch 'milli/main' into bring-v1-changes
This commit is contained in:
commit
fbec48f56e
31
Cargo.lock
generated
31
Cargo.lock
generated
@ -1108,40 +1108,17 @@ dependencies = [
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "deserr"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "86290491a2b5c21a1a5083da8dae831006761258fabd5617309c3eebc5f89468"
|
||||
dependencies = [
|
||||
"deserr-internal 0.1.4",
|
||||
"serde-cs",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "deserr"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "28380303ca15ec07e1d5b079baf19cf849b09edad5cab219c1c51b2bd07523de"
|
||||
dependencies = [
|
||||
"deserr-internal 0.3.0",
|
||||
"deserr-internal",
|
||||
"serde-cs",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "deserr-internal"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7131de1c27581bc376a22166c9f570be91b76cb096be2f6aecf224c27bf7c49a"
|
||||
dependencies = [
|
||||
"convert_case 0.5.0",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "deserr-internal"
|
||||
version = "0.3.0"
|
||||
@ -2477,7 +2454,7 @@ dependencies = [
|
||||
"cargo_toml",
|
||||
"clap 4.0.32",
|
||||
"crossbeam-channel",
|
||||
"deserr 0.3.0",
|
||||
"deserr",
|
||||
"dump",
|
||||
"either",
|
||||
"env_logger",
|
||||
@ -2569,7 +2546,7 @@ dependencies = [
|
||||
"anyhow",
|
||||
"convert_case 0.6.0",
|
||||
"csv",
|
||||
"deserr 0.3.0",
|
||||
"deserr",
|
||||
"either",
|
||||
"enum-iterator",
|
||||
"file-store",
|
||||
@ -2628,7 +2605,7 @@ dependencies = [
|
||||
"concat-arrays",
|
||||
"crossbeam-channel",
|
||||
"csv",
|
||||
"deserr 0.1.4",
|
||||
"deserr",
|
||||
"either",
|
||||
"filter-parser",
|
||||
"flatten-serde-json",
|
||||
|
@ -12,7 +12,7 @@ byteorder = "1.4.3"
|
||||
charabia = { version = "0.7.0", default-features = false }
|
||||
concat-arrays = "0.1.2"
|
||||
crossbeam-channel = "0.5.6"
|
||||
deserr = "0.1.4"
|
||||
deserr = "0.3.0"
|
||||
either = "1.8.0"
|
||||
flatten-serde-json = { path = "../flatten-serde-json" }
|
||||
fst = "0.4.7"
|
||||
|
@ -1,5 +1,6 @@
|
||||
use std::collections::BTreeSet;
|
||||
use std::convert::Infallible;
|
||||
use std::fmt::Write;
|
||||
use std::{io, str};
|
||||
|
||||
use heed::{Error as HeedError, MdbError};
|
||||
@ -100,10 +101,11 @@ A document identifier can be of type integer or string, \
|
||||
only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_).", .document_id.to_string()
|
||||
)]
|
||||
InvalidDocumentId { document_id: Value },
|
||||
#[error("Invalid facet distribution, the fields `{}` are not set as filterable.",
|
||||
.invalid_facets_name.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", ")
|
||||
)]
|
||||
InvalidFacetsDistribution { invalid_facets_name: BTreeSet<String> },
|
||||
#[error("Invalid facet distribution, {}", format_invalid_filter_distribution(.invalid_facets_name, .valid_facets_name))]
|
||||
InvalidFacetsDistribution {
|
||||
invalid_facets_name: BTreeSet<String>,
|
||||
valid_facets_name: BTreeSet<String>,
|
||||
},
|
||||
#[error(transparent)]
|
||||
InvalidGeoField(#[from] GeoError),
|
||||
#[error("{0}")]
|
||||
@ -152,6 +154,8 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
|
||||
pub enum GeoError {
|
||||
#[error("The `_geo` field in the document with the id: `{document_id}` is not an object. Was expecting an object with the `_geo.lat` and `_geo.lng` fields but instead got `{value}`.")]
|
||||
NotAnObject { document_id: Value, value: Value },
|
||||
#[error("The `_geo` field in the document with the id: `{document_id}` contains the following unexpected fields: `{value}`.")]
|
||||
UnexpectedExtraFields { document_id: Value, value: Value },
|
||||
#[error("Could not find latitude nor longitude in the document with the id: `{document_id}`. Was expecting `_geo.lat` and `_geo.lng` fields.")]
|
||||
MissingLatitudeAndLongitude { document_id: Value },
|
||||
#[error("Could not find latitude in the document with the id: `{document_id}`. Was expecting a `_geo.lat` field.")]
|
||||
@ -166,6 +170,50 @@ pub enum GeoError {
|
||||
BadLongitude { document_id: Value, value: Value },
|
||||
}
|
||||
|
||||
fn format_invalid_filter_distribution(
|
||||
invalid_facets_name: &BTreeSet<String>,
|
||||
valid_facets_name: &BTreeSet<String>,
|
||||
) -> String {
|
||||
if valid_facets_name.is_empty() {
|
||||
return "this index does not have configured filterable attributes.".into();
|
||||
}
|
||||
|
||||
let mut result = String::new();
|
||||
|
||||
match invalid_facets_name.len() {
|
||||
0 => (),
|
||||
1 => write!(
|
||||
result,
|
||||
"attribute `{}` is not filterable.",
|
||||
invalid_facets_name.first().unwrap()
|
||||
)
|
||||
.unwrap(),
|
||||
_ => write!(
|
||||
result,
|
||||
"attributes `{}` are not filterable.",
|
||||
invalid_facets_name.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", ")
|
||||
)
|
||||
.unwrap(),
|
||||
};
|
||||
|
||||
match valid_facets_name.len() {
|
||||
1 => write!(
|
||||
result,
|
||||
" The available filterable attribute is `{}`.",
|
||||
valid_facets_name.first().unwrap()
|
||||
)
|
||||
.unwrap(),
|
||||
_ => write!(
|
||||
result,
|
||||
" The available filterable attributes are `{}`.",
|
||||
valid_facets_name.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", ")
|
||||
)
|
||||
.unwrap(),
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// A little macro helper to autogenerate From implementation that needs two `Into`.
|
||||
/// Given the following parameters: `error_from_sub_error!(FieldIdMapMissingEntry => InternalError)`
|
||||
/// the macro will create the following code:
|
||||
|
@ -426,7 +426,7 @@ impl Index {
|
||||
}
|
||||
|
||||
/// Returns the `rtree` which associates coordinates to documents ids.
|
||||
pub fn geo_rtree(&self, rtxn: &'_ RoTxn) -> Result<Option<RTree<GeoPoint>>> {
|
||||
pub fn geo_rtree(&self, rtxn: &RoTxn) -> Result<Option<RTree<GeoPoint>>> {
|
||||
match self
|
||||
.main
|
||||
.get::<_, Str, SerdeBincode<RTree<GeoPoint>>>(rtxn, main_key::GEO_RTREE_KEY)?
|
||||
@ -2292,4 +2292,69 @@ pub(crate) mod tests {
|
||||
assert!(all_ids.insert(id));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bug_3007() {
|
||||
// https://github.com/meilisearch/meilisearch/issues/3007
|
||||
|
||||
use crate::error::{GeoError, UserError};
|
||||
let index = TempIndex::new();
|
||||
|
||||
// Given is an index with a geo field NOT contained in the sortable_fields of the settings
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_primary_key("id".to_string());
|
||||
settings.set_filterable_fields(HashSet::from(["_geo".to_string()]));
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
// happy path
|
||||
index.add_documents(documents!({ "id" : 5, "_geo": {"lat": 12.0, "lng": 11.0}})).unwrap();
|
||||
|
||||
db_snap!(index, geo_faceted_documents_ids);
|
||||
|
||||
// both are unparseable, we expect GeoError::BadLatitudeAndLongitude
|
||||
let err1 = index
|
||||
.add_documents(
|
||||
documents!({ "id" : 6, "_geo": {"lat": "unparseable", "lng": "unparseable"}}),
|
||||
)
|
||||
.unwrap_err();
|
||||
assert!(matches!(
|
||||
err1,
|
||||
Error::UserError(UserError::InvalidGeoField(GeoError::BadLatitudeAndLongitude { .. }))
|
||||
));
|
||||
|
||||
db_snap!(index, geo_faceted_documents_ids); // ensure that no more document was inserted
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unexpected_extra_fields_in_geo_field() {
|
||||
let index = TempIndex::new();
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_primary_key("id".to_string());
|
||||
settings.set_filterable_fields(HashSet::from(["_geo".to_string()]));
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let err = index
|
||||
.add_documents(
|
||||
documents!({ "id" : "doggo", "_geo": { "lat": 1, "lng": 2, "doggo": "are the best" }}),
|
||||
)
|
||||
.unwrap_err();
|
||||
insta::assert_display_snapshot!(err, @r###"The `_geo` field in the document with the id: `"\"doggo\""` contains the following unexpected fields: `{"doggo":"are the best"}`."###);
|
||||
|
||||
db_snap!(index, geo_faceted_documents_ids); // ensure that no documents were inserted
|
||||
|
||||
// multiple fields and complex values
|
||||
let err = index
|
||||
.add_documents(
|
||||
documents!({ "id" : "doggo", "_geo": { "lat": 1, "lng": 2, "doggo": "are the best", "and": { "all": ["cats", { "are": "beautiful" } ] } } }),
|
||||
)
|
||||
.unwrap_err();
|
||||
insta::assert_display_snapshot!(err, @r###"The `_geo` field in the document with the id: `"\"doggo\""` contains the following unexpected fields: `{"and":{"all":["cats",{"are":"beautiful"}]},"doggo":"are the best"}`."###);
|
||||
|
||||
db_snap!(index, geo_faceted_documents_ids); // ensure that no documents were inserted
|
||||
}
|
||||
}
|
||||
|
@ -183,14 +183,14 @@ impl<'t> Criterion for Proximity<'t> {
|
||||
}
|
||||
|
||||
fn resolve_candidates(
|
||||
ctx: &'_ dyn Context,
|
||||
ctx: &dyn Context,
|
||||
query_tree: &Operation,
|
||||
proximity: u8,
|
||||
cache: &mut Cache,
|
||||
wdcache: &mut WordDerivationsCache,
|
||||
) -> Result<RoaringBitmap> {
|
||||
fn resolve_operation(
|
||||
ctx: &'_ dyn Context,
|
||||
ctx: &dyn Context,
|
||||
query_tree: &Operation,
|
||||
proximity: u8,
|
||||
cache: &mut Cache,
|
||||
@ -244,7 +244,7 @@ fn resolve_candidates(
|
||||
}
|
||||
|
||||
fn mdfs_pair(
|
||||
ctx: &'_ dyn Context,
|
||||
ctx: &dyn Context,
|
||||
left: &Operation,
|
||||
right: &Operation,
|
||||
proximity: u8,
|
||||
@ -299,7 +299,7 @@ fn resolve_candidates(
|
||||
}
|
||||
|
||||
fn mdfs(
|
||||
ctx: &'_ dyn Context,
|
||||
ctx: &dyn Context,
|
||||
branches: &[Operation],
|
||||
proximity: u8,
|
||||
cache: &mut Cache,
|
||||
|
@ -240,14 +240,14 @@ fn alterate_query_tree(
|
||||
}
|
||||
|
||||
fn resolve_candidates(
|
||||
ctx: &'_ dyn Context,
|
||||
ctx: &dyn Context,
|
||||
query_tree: &Operation,
|
||||
number_typos: u8,
|
||||
cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
|
||||
wdcache: &mut WordDerivationsCache,
|
||||
) -> Result<RoaringBitmap> {
|
||||
fn resolve_operation(
|
||||
ctx: &'_ dyn Context,
|
||||
ctx: &dyn Context,
|
||||
query_tree: &Operation,
|
||||
number_typos: u8,
|
||||
cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
|
||||
@ -277,7 +277,7 @@ fn resolve_candidates(
|
||||
}
|
||||
|
||||
fn mdfs(
|
||||
ctx: &'_ dyn Context,
|
||||
ctx: &dyn Context,
|
||||
branches: &[Operation],
|
||||
mana: u8,
|
||||
cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
|
||||
|
@ -291,6 +291,7 @@ impl<'a> FacetDistribution<'a> {
|
||||
if !invalid_fields.is_empty() {
|
||||
return Err(UserError::InvalidFacetsDistribution {
|
||||
invalid_facets_name: invalid_fields.into_iter().cloned().collect(),
|
||||
valid_facets_name: filterable_fields.into_iter().collect(),
|
||||
}
|
||||
.into());
|
||||
} else {
|
||||
|
@ -0,0 +1,4 @@
|
||||
---
|
||||
source: milli/src/index.rs
|
||||
---
|
||||
[0, ]
|
@ -0,0 +1,4 @@
|
||||
---
|
||||
source: milli/src/index.rs
|
||||
---
|
||||
[]
|
@ -575,8 +575,8 @@ fn remove_from_word_docids(
|
||||
}
|
||||
|
||||
fn remove_docids_from_field_id_docid_facet_value(
|
||||
index: &'_ Index,
|
||||
wtxn: &'_ mut heed::RwTxn,
|
||||
index: &Index,
|
||||
wtxn: &mut heed::RwTxn,
|
||||
facet_type: FacetType,
|
||||
field_id: FieldId,
|
||||
to_remove: &RoaringBitmap,
|
||||
|
@ -159,7 +159,7 @@ impl FacetsUpdateIncrementalInner {
|
||||
/// See documentation of `insert_in_level`
|
||||
fn insert_in_level_0(
|
||||
&self,
|
||||
txn: &'_ mut RwTxn,
|
||||
txn: &mut RwTxn,
|
||||
field_id: u16,
|
||||
facet_value: &[u8],
|
||||
docids: &RoaringBitmap,
|
||||
@ -213,7 +213,7 @@ impl FacetsUpdateIncrementalInner {
|
||||
/// of the parent node should be incremented.
|
||||
fn insert_in_level(
|
||||
&self,
|
||||
txn: &'_ mut RwTxn,
|
||||
txn: &mut RwTxn,
|
||||
field_id: u16,
|
||||
level: u8,
|
||||
facet_value: &[u8],
|
||||
@ -350,7 +350,7 @@ impl FacetsUpdateIncrementalInner {
|
||||
/// Insert the given facet value and corresponding document ids in the database.
|
||||
pub fn insert(
|
||||
&self,
|
||||
txn: &'_ mut RwTxn,
|
||||
txn: &mut RwTxn,
|
||||
field_id: u16,
|
||||
facet_value: &[u8],
|
||||
docids: &RoaringBitmap,
|
||||
@ -472,7 +472,7 @@ impl FacetsUpdateIncrementalInner {
|
||||
/// its left bound as well.
|
||||
fn delete_in_level(
|
||||
&self,
|
||||
txn: &'_ mut RwTxn,
|
||||
txn: &mut RwTxn,
|
||||
field_id: u16,
|
||||
level: u8,
|
||||
facet_value: &[u8],
|
||||
@ -531,7 +531,7 @@ impl FacetsUpdateIncrementalInner {
|
||||
|
||||
fn delete_in_level_0(
|
||||
&self,
|
||||
txn: &'_ mut RwTxn,
|
||||
txn: &mut RwTxn,
|
||||
field_id: u16,
|
||||
facet_value: &[u8],
|
||||
docids: &RoaringBitmap,
|
||||
@ -559,7 +559,7 @@ impl FacetsUpdateIncrementalInner {
|
||||
|
||||
pub fn delete(
|
||||
&self,
|
||||
txn: &'_ mut RwTxn,
|
||||
txn: &mut RwTxn,
|
||||
field_id: u16,
|
||||
facet_value: &[u8],
|
||||
docids: &RoaringBitmap,
|
||||
|
@ -98,7 +98,12 @@ pub fn enrich_documents_batch<R: Read + Seek>(
|
||||
// If the settings specifies that a _geo field must be used therefore we must check the
|
||||
// validity of it in all the documents of this batch and this is when we return `Some`.
|
||||
let geo_field_id = match documents_batch_index.id("_geo") {
|
||||
Some(geo_field_id) if index.sortable_fields(rtxn)?.contains("_geo") => Some(geo_field_id),
|
||||
Some(geo_field_id)
|
||||
if index.sortable_fields(rtxn)?.contains("_geo")
|
||||
|| index.filterable_fields(rtxn)?.contains("_geo") =>
|
||||
{
|
||||
Some(geo_field_id)
|
||||
}
|
||||
_otherwise => None,
|
||||
};
|
||||
|
||||
@ -367,11 +372,17 @@ pub fn extract_finite_float_from_value(value: Value) -> StdResult<f64, Value> {
|
||||
|
||||
pub fn validate_geo_from_json(id: &DocumentId, bytes: &[u8]) -> Result<StdResult<(), GeoError>> {
|
||||
use GeoError::*;
|
||||
let debug_id = || Value::from(id.debug());
|
||||
let debug_id = || {
|
||||
serde_json::from_slice(id.value().as_bytes()).unwrap_or_else(|_| Value::from(id.debug()))
|
||||
};
|
||||
match serde_json::from_slice(bytes).map_err(InternalError::SerdeJson)? {
|
||||
Value::Object(mut object) => match (object.remove("lat"), object.remove("lng")) {
|
||||
(Some(lat), Some(lng)) => {
|
||||
match (extract_finite_float_from_value(lat), extract_finite_float_from_value(lng)) {
|
||||
(Ok(_), Ok(_)) if !object.is_empty() => Ok(Err(UnexpectedExtraFields {
|
||||
document_id: debug_id(),
|
||||
value: object.into(),
|
||||
})),
|
||||
(Ok(_), Ok(_)) => Ok(Ok(())),
|
||||
(Err(value), Ok(_)) => Ok(Err(BadLatitude { document_id: debug_id(), value })),
|
||||
(Ok(_), Err(value)) => Ok(Err(BadLongitude { document_id: debug_id(), value })),
|
||||
|
@ -965,34 +965,6 @@ mod tests {
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn index_all_flavour_of_geo() {
|
||||
let mut index = TempIndex::new();
|
||||
index.index_documents_config.update_method = IndexDocumentsMethod::ReplaceDocuments;
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_filterable_fields(hashset!(S("_geo")));
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
index
|
||||
.add_documents(documents!([
|
||||
{ "id": 0, "_geo": { "lat": 31, "lng": [42] } },
|
||||
{ "id": 1, "_geo": { "lat": "31" }, "_geo.lng": 42 },
|
||||
{ "id": 2, "_geo": { "lng": "42" }, "_geo.lat": "31" },
|
||||
{ "id": 3, "_geo.lat": 31, "_geo.lng": "42" },
|
||||
]))
|
||||
.unwrap();
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let mut search = crate::Search::new(&rtxn, &index);
|
||||
search.filter(crate::Filter::from_str("_geoRadius(31, 42, 0.000001)").unwrap().unwrap());
|
||||
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
|
||||
assert_eq!(documents_ids, vec![0, 1, 2, 3]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn geo_error() {
|
||||
let mut index = TempIndex::new();
|
||||
|
@ -37,9 +37,6 @@ where
|
||||
_ => T::deserialize_from_value(value, location).map(Setting::Set),
|
||||
}
|
||||
}
|
||||
fn default() -> Option<Self> {
|
||||
Some(Self::NotSet)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Default for Setting<T> {
|
||||
|
@ -140,16 +140,20 @@ impl<'t, 'u, 'i> WordPrefixPositionDocids<'t, 'u, 'i> {
|
||||
|
||||
// We remove all the entries that are no more required in this word prefix position
|
||||
// docids database.
|
||||
let mut iter =
|
||||
self.index.word_prefix_position_docids.iter_mut(self.wtxn)?.lazily_decode_data();
|
||||
while let Some(((prefix, _), _)) = iter.next().transpose()? {
|
||||
if del_prefix_fst_words.contains(prefix.as_bytes()) {
|
||||
unsafe { iter.del_current()? };
|
||||
// We also avoid iterating over the whole `word_prefix_position_docids` database if we know in
|
||||
// advance that the `if del_prefix_fst_words.contains(prefix.as_bytes()) {` condition below
|
||||
// will always be false (i.e. if `del_prefix_fst_words` is empty).
|
||||
if !del_prefix_fst_words.is_empty() {
|
||||
let mut iter =
|
||||
self.index.word_prefix_position_docids.iter_mut(self.wtxn)?.lazily_decode_data();
|
||||
while let Some(((prefix, _), _)) = iter.next().transpose()? {
|
||||
if del_prefix_fst_words.contains(prefix.as_bytes()) {
|
||||
unsafe { iter.del_current()? };
|
||||
}
|
||||
}
|
||||
drop(iter);
|
||||
}
|
||||
|
||||
drop(iter);
|
||||
|
||||
// We finally write all the word prefix position docids into the LMDB database.
|
||||
sorter_into_lmdb_database(
|
||||
self.wtxn,
|
||||
|
Loading…
x
Reference in New Issue
Block a user