diff --git a/crates/index-scheduler/src/batch.rs b/crates/index-scheduler/src/batch.rs index a40eac02c..051ae776b 100644 --- a/crates/index-scheduler/src/batch.rs +++ b/crates/index-scheduler/src/batch.rs @@ -29,16 +29,13 @@ use bumpalo::Bump; use dump::IndexMetadata; use meilisearch_types::batches::BatchId; use meilisearch_types::heed::{RoTxn, RwTxn}; +use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME; use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader, PrimaryKey}; use meilisearch_types::milli::heed::CompactionOption; use meilisearch_types::milli::progress::Progress; use meilisearch_types::milli::update::new::indexer::{self, UpdateByFunction}; -use meilisearch_types::milli::update::{ - DocumentAdditionResult, IndexDocumentsMethod, Settings as MilliSettings, -}; -use meilisearch_types::milli::vector::parsed_vectors::{ - ExplicitVectors, VectorOrArrayOfVectors, RESERVED_VECTORS_FIELD_NAME, -}; +use meilisearch_types::milli::update::{DocumentAdditionResult, IndexDocumentsMethod, Settings as MilliSettings}; +use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors}; use meilisearch_types::milli::{self, Filter, ThreadPoolNoAbortBuilder}; use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked}; use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task}; diff --git a/crates/meilisearch/src/search/mod.rs b/crates/meilisearch/src/search/mod.rs index 00285c4ef..b48266b6a 100644 --- a/crates/meilisearch/src/search/mod.rs +++ b/crates/meilisearch/src/search/mod.rs @@ -1209,8 +1209,7 @@ impl<'a> HitMaker<'a> { .displayed_fields_ids(rtxn)? .map(|fields| fields.into_iter().collect::>()); - let vectors_fid = - fields_ids_map.id(milli::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME); + let vectors_fid = fields_ids_map.id(milli::constants::RESERVED_VECTORS_FIELD_NAME); let vectors_is_hidden = match (&displayed_ids, vectors_fid) { // displayed_ids is a wildcard, so `_vectors` can be displayed regardless of its fid @@ -1219,8 +1218,7 @@ impl<'a> HitMaker<'a> { (Some(_), None) => { // unwrap as otherwise we'd go to the first one let displayed_names = index.displayed_fields(rtxn)?.unwrap(); - !displayed_names - .contains(&milli::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME) + !displayed_names.contains(&milli::constants::RESERVED_VECTORS_FIELD_NAME) } // displayed_ids is a finit list, so hide if `_vectors` is not part of it (Some(map), Some(vectors_fid)) => map.contains(&vectors_fid), diff --git a/crates/milli/src/asc_desc.rs b/crates/milli/src/asc_desc.rs index bde0dd440..c04748036 100644 --- a/crates/milli/src/asc_desc.rs +++ b/crates/milli/src/asc_desc.rs @@ -6,6 +6,7 @@ use std::str::FromStr; use serde::{Deserialize, Serialize}; use thiserror::Error; +use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::error::is_reserved_keyword; use crate::search::facet::BadGeoError; use crate::{CriterionError, Error, UserError}; @@ -175,7 +176,7 @@ impl From for SortError { AscDescError::ReservedKeyword { name } if name.starts_with("_geoPoint") => { SortError::BadGeoPointUsage { name } } - AscDescError::ReservedKeyword { name } if &name == "_geo" => { + AscDescError::ReservedKeyword { name } if &name == RESERVED_GEO_FIELD_NAME => { SortError::ReservedNameForSettings { name } } AscDescError::ReservedKeyword { name } if name.starts_with("_geoRadius") => { diff --git a/crates/milli/src/constants.rs b/crates/milli/src/constants.rs new file mode 100644 index 000000000..3dd787f1c --- /dev/null +++ b/crates/milli/src/constants.rs @@ -0,0 +1,2 @@ +pub const RESERVED_VECTORS_FIELD_NAME: &str = "_vectors"; +pub const RESERVED_GEO_FIELD_NAME: &str = "_geo"; diff --git a/crates/milli/src/criterion.rs b/crates/milli/src/criterion.rs index 45cbfe63d..a1487fa79 100644 --- a/crates/milli/src/criterion.rs +++ b/crates/milli/src/criterion.rs @@ -113,6 +113,7 @@ mod tests { use CriterionError::*; use super::*; + use crate::constants::RESERVED_GEO_FIELD_NAME; #[test] fn parse_criterion() { @@ -153,7 +154,7 @@ mod tests { ("price:aasc", InvalidName { name: S("price:aasc") }), ("price:asc and desc", InvalidName { name: S("price:asc and desc") }), ("price:asc:truc", InvalidName { name: S("price:asc:truc") }), - ("_geo:asc", ReservedName { name: S("_geo") }), + ("_geo:asc", ReservedName { name: S(RESERVED_GEO_FIELD_NAME) }), ("_geoDistance:asc", ReservedName { name: S("_geoDistance") }), ("_geoPoint:asc", ReservedNameForSort { name: S("_geoPoint") }), ("_geoPoint(42, 75):asc", ReservedNameForSort { name: S("_geoPoint") }), diff --git a/crates/milli/src/error.rs b/crates/milli/src/error.rs index 2bd57bba5..f5f784ee0 100644 --- a/crates/milli/src/error.rs +++ b/crates/milli/src/error.rs @@ -10,12 +10,14 @@ use rhai::EvalAltResult; use serde_json::Value; use thiserror::Error; +use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::documents::{self, DocumentsBatchCursorError}; use crate::thread_pool_no_abort::PanicCatched; use crate::{CriterionError, DocumentId, FieldId, Object, SortError}; pub fn is_reserved_keyword(keyword: &str) -> bool { - ["_geo", "_geoDistance", "_geoPoint", "_geoRadius", "_geoBoundingBox"].contains(&keyword) + [RESERVED_GEO_FIELD_NAME, "_geoDistance", "_geoPoint", "_geoRadius", "_geoBoundingBox"] + .contains(&keyword) } #[derive(Error, Debug)] diff --git a/crates/milli/src/fieldids_weights_map.rs b/crates/milli/src/fieldids_weights_map.rs index 13f2f8afc..194e4649c 100644 --- a/crates/milli/src/fieldids_weights_map.rs +++ b/crates/milli/src/fieldids_weights_map.rs @@ -4,8 +4,7 @@ use std::collections::HashMap; use serde::{Deserialize, Serialize}; -use crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME; -use crate::{FieldId, FieldsIdsMap, Weight}; +use crate::{constants::RESERVED_VECTORS_FIELD_NAME, FieldId, FieldsIdsMap, Weight}; #[derive(Debug, Default, Serialize, Deserialize)] pub struct FieldidsWeightsMap { diff --git a/crates/milli/src/index.rs b/crates/milli/src/index.rs index f60b59c72..bd832667b 100644 --- a/crates/milli/src/index.rs +++ b/crates/milli/src/index.rs @@ -10,6 +10,7 @@ use roaring::RoaringBitmap; use rstar::RTree; use serde::{Deserialize, Serialize}; +use crate::constants::RESERVED_VECTORS_FIELD_NAME; use crate::documents::PrimaryKey; use crate::error::{InternalError, UserError}; use crate::fields_ids_map::FieldsIdsMap; @@ -20,7 +21,7 @@ use crate::heed_codec::facet::{ use crate::heed_codec::{BEU16StrCodec, FstSetCodec, StrBEU16Codec, StrRefCodec}; use crate::order_by_map::OrderByMap; use crate::proximity::ProximityPrecision; -use crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME; + use crate::vector::{ArroyWrapper, Embedding, EmbeddingConfig}; use crate::{ default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, @@ -1732,6 +1733,7 @@ pub(crate) mod tests { use memmap2::Mmap; use tempfile::TempDir; + use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::error::{Error, InternalError}; use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS}; use crate::progress::Progress; @@ -2173,16 +2175,16 @@ pub(crate) mod tests { index .update_settings(|settings| { - settings.set_filterable_fields(hashset! { S("_geo") }); + settings.set_filterable_fields(hashset! { S(RESERVED_GEO_FIELD_NAME) }); }) .unwrap(); index .add_documents(documents!([ - { "id": 0, "_geo": { "lat": "0", "lng": "0" } }, - { "id": 1, "_geo": { "lat": 0, "lng": "-175" } }, - { "id": 2, "_geo": { "lat": "0", "lng": 175 } }, - { "id": 3, "_geo": { "lat": 85, "lng": 0 } }, - { "id": 4, "_geo": { "lat": "-85", "lng": "0" } }, + { "id": 0, RESERVED_GEO_FIELD_NAME: { "lat": "0", "lng": "0" } }, + { "id": 1, RESERVED_GEO_FIELD_NAME: { "lat": 0, "lng": "-175" } }, + { "id": 2, RESERVED_GEO_FIELD_NAME: { "lat": "0", "lng": 175 } }, + { "id": 3, RESERVED_GEO_FIELD_NAME: { "lat": 85, "lng": 0 } }, + { "id": 4, RESERVED_GEO_FIELD_NAME: { "lat": "-85", "lng": "0" } }, ])) .unwrap(); @@ -2859,19 +2861,24 @@ pub(crate) mod tests { index .update_settings(|settings| { settings.set_primary_key("id".to_string()); - settings.set_filterable_fields(HashSet::from(["_geo".to_string()])); + settings + .set_filterable_fields(HashSet::from([RESERVED_GEO_FIELD_NAME.to_string()])); }) .unwrap(); // happy path - index.add_documents(documents!({ "id" : 5, "_geo": {"lat": 12.0, "lng": 11.0}})).unwrap(); + index + .add_documents( + documents!({ "id" : 5, RESERVED_GEO_FIELD_NAME: {"lat": 12.0, "lng": 11.0}}), + ) + .unwrap(); db_snap!(index, geo_faceted_documents_ids); // both are unparseable, we expect GeoError::BadLatitudeAndLongitude let err1 = index .add_documents( - documents!({ "id" : 6, "_geo": {"lat": "unparseable", "lng": "unparseable"}}), + documents!({ "id" : 6, RESERVED_GEO_FIELD_NAME: {"lat": "unparseable", "lng": "unparseable"}}), ) .unwrap_err(); assert!(matches!( @@ -2889,13 +2896,14 @@ pub(crate) mod tests { index .update_settings(|settings| { settings.set_primary_key("id".to_string()); - settings.set_filterable_fields(HashSet::from(["_geo".to_string()])); + settings + .set_filterable_fields(HashSet::from([RESERVED_GEO_FIELD_NAME.to_string()])); }) .unwrap(); let err = index .add_documents( - documents!({ "id" : "doggo", "_geo": { "lat": 1, "lng": 2, "doggo": "are the best" }}), + documents!({ "id" : "doggo", RESERVED_GEO_FIELD_NAME: { "lat": 1, "lng": 2, "doggo": "are the best" }}), ) .unwrap_err(); insta::assert_snapshot!(err, @r###"The `_geo` field in the document with the id: `"doggo"` contains the following unexpected fields: `{"doggo":"are the best"}`."###); @@ -2905,7 +2913,7 @@ pub(crate) mod tests { // multiple fields and complex values let err = index .add_documents( - documents!({ "id" : "doggo", "_geo": { "lat": 1, "lng": 2, "doggo": "are the best", "and": { "all": ["cats", { "are": "beautiful" } ] } } }), + documents!({ "id" : "doggo", RESERVED_GEO_FIELD_NAME: { "lat": 1, "lng": 2, "doggo": "are the best", "and": { "all": ["cats", { "are": "beautiful" } ] } } }), ) .unwrap_err(); insta::assert_snapshot!(err, @r###"The `_geo` field in the document with the id: `"doggo"` contains the following unexpected fields: `{"and":{"all":["cats",{"are":"beautiful"}]},"doggo":"are the best"}`."###); diff --git a/crates/milli/src/lib.rs b/crates/milli/src/lib.rs index 3ae0bfdb9..db44f745f 100644 --- a/crates/milli/src/lib.rs +++ b/crates/milli/src/lib.rs @@ -30,6 +30,7 @@ pub mod vector; #[cfg(test)] #[macro_use] pub mod snapshot_tests; +pub mod constants; mod fieldids_weights_map; pub mod progress; diff --git a/crates/milli/src/search/facet/filter.rs b/crates/milli/src/search/facet/filter.rs index aa3849ffc..76f9ed6ff 100644 --- a/crates/milli/src/search/facet/filter.rs +++ b/crates/milli/src/search/facet/filter.rs @@ -10,6 +10,7 @@ use roaring::{MultiOps, RoaringBitmap}; use serde_json::Value; use super::facet_range_search; +use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::error::{Error, UserError}; use crate::heed_codec::facet::{ FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, OrderedF64Codec, @@ -501,7 +502,7 @@ impl<'a> Filter<'a> { } } FilterCondition::GeoLowerThan { point, radius } => { - if filterable_fields.contains("_geo") { + if filterable_fields.contains(RESERVED_GEO_FIELD_NAME) { let base_point: [f64; 2] = [point[0].parse_finite_float()?, point[1].parse_finite_float()?]; if !(-90.0..=90.0).contains(&base_point[0]) { @@ -530,13 +531,13 @@ impl<'a> Filter<'a> { Ok(result) } else { Err(point[0].as_external_error(FilterError::AttributeNotFilterable { - attribute: "_geo", + attribute: RESERVED_GEO_FIELD_NAME, filterable_fields: filterable_fields.clone(), }))? } } FilterCondition::GeoBoundingBox { top_right_point, bottom_left_point } => { - if filterable_fields.contains("_geo") { + if filterable_fields.contains(RESERVED_GEO_FIELD_NAME) { let top_right: [f64; 2] = [ top_right_point[0].parse_finite_float()?, top_right_point[1].parse_finite_float()?, @@ -663,7 +664,7 @@ impl<'a> Filter<'a> { } else { Err(top_right_point[0].as_external_error( FilterError::AttributeNotFilterable { - attribute: "_geo", + attribute: RESERVED_GEO_FIELD_NAME, filterable_fields: filterable_fields.clone(), }, ))? @@ -689,6 +690,7 @@ mod tests { use maplit::hashset; use roaring::RoaringBitmap; + use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::index::tests::TempIndex; use crate::Filter; @@ -899,7 +901,7 @@ mod tests { index .update_settings(|settings| { - settings.set_filterable_fields(hashset! { S("_geo") }); + settings.set_filterable_fields(hashset! { S(RESERVED_GEO_FIELD_NAME) }); }) .unwrap(); @@ -911,7 +913,7 @@ mod tests { "address": "Viale Vittorio Veneto, 30, 20124, Milan, Italy", "type": "pizza", "rating": 9, - "_geo": { + RESERVED_GEO_FIELD_NAME: { "lat": 45.4777599, "lng": 9.1967508 } @@ -922,7 +924,7 @@ mod tests { "address": "Via Dogana, 1, 20123 Milan, Italy", "type": "ice cream", "rating": 10, - "_geo": { + RESERVED_GEO_FIELD_NAME: { "lat": 45.4632046, "lng": 9.1719421 } @@ -945,8 +947,8 @@ mod tests { index .update_settings(|settings| { - settings.set_searchable_fields(vec![S("_geo"), S("price")]); // to keep the fields order - settings.set_filterable_fields(hashset! { S("_geo"), S("price") }); + settings.set_searchable_fields(vec![S(RESERVED_GEO_FIELD_NAME), S("price")]); // to keep the fields order + settings.set_filterable_fields(hashset! { S(RESERVED_GEO_FIELD_NAME), S("price") }); }) .unwrap(); @@ -995,8 +997,8 @@ mod tests { index .update_settings(|settings| { - settings.set_searchable_fields(vec![S("_geo"), S("price")]); // to keep the fields order - settings.set_filterable_fields(hashset! { S("_geo"), S("price") }); + settings.set_searchable_fields(vec![S(RESERVED_GEO_FIELD_NAME), S("price")]); // to keep the fields order + settings.set_filterable_fields(hashset! { S(RESERVED_GEO_FIELD_NAME), S("price") }); }) .unwrap(); diff --git a/crates/milli/src/search/new/mod.rs b/crates/milli/src/search/new/mod.rs index 4edcd09de..d47f88830 100644 --- a/crates/milli/src/search/new/mod.rs +++ b/crates/milli/src/search/new/mod.rs @@ -50,6 +50,7 @@ use self::graph_based_ranking_rule::Words; use self::interner::Interned; use self::vector_sort::VectorSort; use crate::index::PrefixSearch; +use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::localized_attributes_rules::LocalizedFieldIds; use crate::score_details::{ScoreDetails, ScoringStrategy}; use crate::search::new::distinct::apply_distinct_rule; @@ -863,12 +864,12 @@ fn check_sort_criteria( } .into()); } - Member::Geo(_) if !sortable_fields.contains("_geo") => { + Member::Geo(_) if !sortable_fields.contains(RESERVED_GEO_FIELD_NAME) => { let (valid_fields, hidden_fields) = ctx.index.remove_hidden_fields(ctx.txn, sortable_fields)?; return Err(UserError::InvalidSortableAttribute { - field: "_geo".to_string(), + field: RESERVED_GEO_FIELD_NAME.to_string(), valid_fields, hidden_fields, } diff --git a/crates/milli/src/search/new/tests/geo_sort.rs b/crates/milli/src/search/new/tests/geo_sort.rs index 0d65b589a..2eda39ba1 100644 --- a/crates/milli/src/search/new/tests/geo_sort.rs +++ b/crates/milli/src/search/new/tests/geo_sort.rs @@ -6,6 +6,7 @@ use big_s::S; use heed::RoTxn; use maplit::hashset; +use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::index::tests::TempIndex; use crate::score_details::ScoreDetails; use crate::search::new::tests::collect_field_values; @@ -17,7 +18,7 @@ fn create_index() -> TempIndex { index .update_settings(|s| { s.set_primary_key("id".to_owned()); - s.set_sortable_fields(hashset! { S("_geo") }); + s.set_sortable_fields(hashset! { S(RESERVED_GEO_FIELD_NAME) }); s.set_criteria(vec![Criterion::Words, Criterion::Sort]); }) .unwrap(); @@ -68,12 +69,12 @@ fn test_geo_sort() { index .add_documents(documents!([ - { "id": 2, "_geo": { "lat": 2, "lng": -1 } }, - { "id": 3, "_geo": { "lat": -2, "lng": -2 } }, - { "id": 5, "_geo": { "lat": 6, "lng": -5 } }, - { "id": 4, "_geo": { "lat": 3, "lng": 5 } }, - { "id": 0, "_geo": { "lat": 0, "lng": 0 } }, - { "id": 1, "_geo": { "lat": 1, "lng": 1 } }, + { "id": 2, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": -1 } }, + { "id": 3, RESERVED_GEO_FIELD_NAME: { "lat": -2, "lng": -2 } }, + { "id": 5, RESERVED_GEO_FIELD_NAME: { "lat": 6, "lng": -5 } }, + { "id": 4, RESERVED_GEO_FIELD_NAME: { "lat": 3, "lng": 5 } }, + { "id": 0, RESERVED_GEO_FIELD_NAME: { "lat": 0, "lng": 0 } }, + { "id": 1, RESERVED_GEO_FIELD_NAME: { "lat": 1, "lng": 1 } }, { "id": 6 }, { "id": 8 }, { "id": 7 }, { "id": 10 }, { "id": 9 }, ])) .unwrap(); @@ -100,12 +101,12 @@ fn test_geo_sort_around_the_edge_of_the_flat_earth() { index .add_documents(documents!([ - { "id": 0, "_geo": { "lat": 0, "lng": 0 } }, - { "id": 1, "_geo": { "lat": 88, "lng": 0 } }, - { "id": 2, "_geo": { "lat": -89, "lng": 0 } }, + { "id": 0, RESERVED_GEO_FIELD_NAME: { "lat": 0, "lng": 0 } }, + { "id": 1, RESERVED_GEO_FIELD_NAME: { "lat": 88, "lng": 0 } }, + { "id": 2, RESERVED_GEO_FIELD_NAME: { "lat": -89, "lng": 0 } }, - { "id": 3, "_geo": { "lat": 0, "lng": 178 } }, - { "id": 4, "_geo": { "lat": 0, "lng": -179 } }, + { "id": 3, RESERVED_GEO_FIELD_NAME: { "lat": 0, "lng": 178 } }, + { "id": 4, RESERVED_GEO_FIELD_NAME: { "lat": 0, "lng": -179 } }, ])) .unwrap(); @@ -177,11 +178,11 @@ fn geo_sort_mixed_with_words() { index .add_documents(documents!([ - { "id": 0, "doggo": "jean", "_geo": { "lat": 0, "lng": 0 } }, - { "id": 1, "doggo": "intel", "_geo": { "lat": 88, "lng": 0 } }, - { "id": 2, "doggo": "jean bob", "_geo": { "lat": -89, "lng": 0 } }, - { "id": 3, "doggo": "jean michel", "_geo": { "lat": 0, "lng": 178 } }, - { "id": 4, "doggo": "bob marley", "_geo": { "lat": 0, "lng": -179 } }, + { "id": 0, "doggo": "jean", RESERVED_GEO_FIELD_NAME: { "lat": 0, "lng": 0 } }, + { "id": 1, "doggo": "intel", RESERVED_GEO_FIELD_NAME: { "lat": 88, "lng": 0 } }, + { "id": 2, "doggo": "jean bob", RESERVED_GEO_FIELD_NAME: { "lat": -89, "lng": 0 } }, + { "id": 3, "doggo": "jean michel", RESERVED_GEO_FIELD_NAME: { "lat": 0, "lng": 178 } }, + { "id": 4, "doggo": "bob marley", RESERVED_GEO_FIELD_NAME: { "lat": 0, "lng": -179 } }, ])) .unwrap(); diff --git a/crates/milli/src/search/new/tests/integration.rs b/crates/milli/src/search/new/tests/integration.rs index 04d3b6667..fc15b5f12 100644 --- a/crates/milli/src/search/new/tests/integration.rs +++ b/crates/milli/src/search/new/tests/integration.rs @@ -11,6 +11,7 @@ use crate::update::{IndexDocumentsMethod, IndexerConfig, Settings}; use crate::vector::EmbeddingConfigs; use crate::{db_snap, Criterion, Index}; pub const CONTENT: &str = include_str!("../../../../tests/assets/test_set.ndjson"); +use crate::constants::RESERVED_GEO_FIELD_NAME; pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { let path = tempfile::tempdir().unwrap(); @@ -27,7 +28,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { builder.set_filterable_fields(hashset! { S("tag"), S("asc_desc_rank"), - S("_geo"), + S(RESERVED_GEO_FIELD_NAME), S("opt1"), S("opt1.opt2"), S("tag_in") diff --git a/crates/milli/src/update/clear_documents.rs b/crates/milli/src/update/clear_documents.rs index 6c4efb859..b0ae070de 100644 --- a/crates/milli/src/update/clear_documents.rs +++ b/crates/milli/src/update/clear_documents.rs @@ -103,6 +103,7 @@ impl<'t, 'i> ClearDocuments<'t, 'i> { #[cfg(test)] mod tests { use super::*; + use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::index::tests::TempIndex; #[test] @@ -114,7 +115,7 @@ mod tests { .add_documents_using_wtxn(&mut wtxn, documents!([ { "id": 0, "name": "kevin", "age": 20 }, { "id": 1, "name": "kevina" }, - { "id": 2, "name": "benoit", "country": "France", "_geo": { "lng": 42, "lat": 35 } } + { "id": 2, "name": "benoit", "country": "France", RESERVED_GEO_FIELD_NAME: { "lng": 42, "lat": 35 } } ])) .unwrap(); diff --git a/crates/milli/src/update/index_documents/enrich.rs b/crates/milli/src/update/index_documents/enrich.rs index 85f871830..c35701961 100644 --- a/crates/milli/src/update/index_documents/enrich.rs +++ b/crates/milli/src/update/index_documents/enrich.rs @@ -5,6 +5,7 @@ use std::result::Result as StdResult; use serde::{Deserialize, Serialize}; use serde_json::Value; +use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::documents::{ DocumentIdExtractionError, DocumentsBatchIndex, DocumentsBatchReader, EnrichedDocumentsBatchReader, PrimaryKey, DEFAULT_PRIMARY_KEY, @@ -93,10 +94,10 @@ pub fn enrich_documents_batch( // If the settings specifies that a _geo field must be used therefore we must check the // validity of it in all the documents of this batch and this is when we return `Some`. - let geo_field_id = match documents_batch_index.id("_geo") { + let geo_field_id = match documents_batch_index.id(RESERVED_GEO_FIELD_NAME) { Some(geo_field_id) - if index.sortable_fields(rtxn)?.contains("_geo") - || index.filterable_fields(rtxn)?.contains("_geo") => + if index.sortable_fields(rtxn)?.contains(RESERVED_GEO_FIELD_NAME) + || index.filterable_fields(rtxn)?.contains(RESERVED_GEO_FIELD_NAME) => { Some(geo_field_id) } diff --git a/crates/milli/src/update/index_documents/extract/extract_vector_points.rs b/crates/milli/src/update/index_documents/extract/extract_vector_points.rs index 7b5bf3f40..9103e8324 100644 --- a/crates/milli/src/update/index_documents/extract/extract_vector_points.rs +++ b/crates/milli/src/update/index_documents/extract/extract_vector_points.rs @@ -13,13 +13,14 @@ use roaring::RoaringBitmap; use serde_json::Value; use super::helpers::{create_writer, writer_into_reader, GrenadParameters}; +use crate::constants::RESERVED_VECTORS_FIELD_NAME; use crate::error::FaultSource; use crate::index::IndexEmbeddingConfig; use crate::prompt::{FieldsIdsMapWithMetadata, Prompt}; use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd}; use crate::update::settings::InnerIndexSettingsDiff; use crate::vector::error::{EmbedErrorKind, PossibleEmbeddingMistakes, UnusedVectorsDistribution}; -use crate::vector::parsed_vectors::{ParsedVectorsDiff, VectorState, RESERVED_VECTORS_FIELD_NAME}; +use crate::vector::parsed_vectors::{ParsedVectorsDiff, VectorState}; use crate::vector::settings::ReindexAction; use crate::vector::{Embedder, Embedding}; use crate::{try_split_array_at, DocumentId, FieldId, Result, ThreadPoolNoAbort}; diff --git a/crates/milli/src/update/index_documents/mod.rs b/crates/milli/src/update/index_documents/mod.rs index bae8e00b4..11ee6916b 100644 --- a/crates/milli/src/update/index_documents/mod.rs +++ b/crates/milli/src/update/index_documents/mod.rs @@ -27,6 +27,7 @@ pub use self::enrich::{extract_finite_float_from_value, DocumentId}; pub use self::helpers::*; pub use self::transform::{Transform, TransformOutput}; use super::new::StdResult; + use crate::documents::{obkv_to_object, DocumentsBatchReader}; use crate::error::{Error, InternalError}; use crate::index::{PrefixSearch, PrefixSettings}; @@ -763,6 +764,7 @@ mod tests { use maplit::hashset; use super::*; + use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::documents::mmap_from_objects; use crate::index::tests::TempIndex; use crate::index::IndexEmbeddingConfig; @@ -944,12 +946,12 @@ mod tests { index.index_documents_config.update_method = IndexDocumentsMethod::ReplaceDocuments; index.add_documents(documents!([ - { "id": 2, "title": "Pride and Prejudice", "author": "Jane Austin", "genre": "romance", "price": 3.5, "_geo": { "lat": 12, "lng": 42 } }, + { "id": 2, "title": "Pride and Prejudice", "author": "Jane Austin", "genre": "romance", "price": 3.5, RESERVED_GEO_FIELD_NAME: { "lat": 12, "lng": 42 } }, { "id": 456, "title": "Le Petit Prince", "author": "Antoine de Saint-Exupéry", "genre": "adventure" , "price": 10.0 }, { "id": 1, "title": "Alice In Wonderland", "author": "Lewis Carroll", "genre": "fantasy", "price": 25.99 }, { "id": 1344, "title": "The Hobbit", "author": "J. R. R. Tolkien", "genre": "fantasy" }, { "id": 4, "title": "Harry Potter and the Half-Blood Prince", "author": "J. K. Rowling", "genre": "fantasy" }, - { "id": 42, "title": "The Hitchhiker's Guide to the Galaxy", "author": "Douglas Adams", "_geo": { "lat": 35, "lng": 23 } } + { "id": 42, "title": "The Hitchhiker's Guide to the Galaxy", "author": "Douglas Adams", RESERVED_GEO_FIELD_NAME: { "lat": 35, "lng": 23 } } ])).unwrap(); db_snap!(index, word_docids, "initial"); @@ -989,18 +991,18 @@ mod tests { // We send 6 documents and mix the ones that have _geo and those that don't have it. index .add_documents(documents!([ - { "id": 2, "price": 3.5, "_geo": { "lat": 12, "lng": 42 } }, + { "id": 2, "price": 3.5, RESERVED_GEO_FIELD_NAME: { "lat": 12, "lng": 42 } }, { "id": 456 }, { "id": 1 }, { "id": 1344 }, { "id": 4 }, - { "id": 42, "_geo": { "lat": 35, "lng": 23 } } + { "id": 42, RESERVED_GEO_FIELD_NAME: { "lat": 35, "lng": 23 } } ])) .unwrap(); index .update_settings(|settings| { - settings.set_filterable_fields(hashset!(S("_geo"))); + settings.set_filterable_fields(hashset!(S(RESERVED_GEO_FIELD_NAME))); }) .unwrap(); } @@ -1012,13 +1014,13 @@ mod tests { index .update_settings(|settings| { - settings.set_filterable_fields(hashset!(S("_geo"))); + settings.set_filterable_fields(hashset!(S(RESERVED_GEO_FIELD_NAME))); }) .unwrap(); let error = index .add_documents(documents!([ - { "id": 0, "_geo": { "lng": 42 } } + { "id": 0, RESERVED_GEO_FIELD_NAME: { "lng": 42 } } ])) .unwrap_err(); assert_eq!( @@ -1028,7 +1030,7 @@ mod tests { let error = index .add_documents(documents!([ - { "id": 0, "_geo": { "lat": 42 } } + { "id": 0, RESERVED_GEO_FIELD_NAME: { "lat": 42 } } ])) .unwrap_err(); assert_eq!( @@ -1038,7 +1040,7 @@ mod tests { let error = index .add_documents(documents!([ - { "id": 0, "_geo": { "lat": "lol", "lng": 42 } } + { "id": 0, RESERVED_GEO_FIELD_NAME: { "lat": "lol", "lng": 42 } } ])) .unwrap_err(); assert_eq!( @@ -1048,7 +1050,7 @@ mod tests { let error = index .add_documents(documents!([ - { "id": 0, "_geo": { "lat": [12, 13], "lng": 42 } } + { "id": 0, RESERVED_GEO_FIELD_NAME: { "lat": [12, 13], "lng": 42 } } ])) .unwrap_err(); assert_eq!( @@ -1058,7 +1060,7 @@ mod tests { let error = index .add_documents(documents!([ - { "id": 0, "_geo": { "lat": 12, "lng": "hello" } } + { "id": 0, RESERVED_GEO_FIELD_NAME: { "lat": 12, "lng": "hello" } } ])) .unwrap_err(); assert_eq!( @@ -1076,7 +1078,7 @@ mod tests { { "objectId": 123, "title": "Pride and Prejudice", "comment": "A great book" }, { "objectId": 456, "title": "Le Petit Prince", "comment": "A french book" }, { "objectId": 1, "title": "Alice In Wonderland", "comment": "A weird book" }, - { "objectId": 30, "title": "Hamlet", "_geo": { "lat": 12, "lng": 89 } } + { "objectId": 30, "title": "Hamlet", RESERVED_GEO_FIELD_NAME: { "lat": 12, "lng": 89 } } ])) .unwrap(); @@ -1091,7 +1093,7 @@ mod tests { index .add_documents(documents!([ - { "objectId": 30, "title": "Hamlet", "_geo": { "lat": 12, "lng": 89 } } + { "objectId": 30, "title": "Hamlet", RESERVED_GEO_FIELD_NAME: { "lat": 12, "lng": 89 } } ])) .unwrap(); @@ -1102,7 +1104,7 @@ mod tests { index .add_documents(documents!([ - { "objectId": 30, "title": "Hamlet", "_geo": { "lat": 12, "lng": 89 } } + { "objectId": 30, "title": "Hamlet", RESERVED_GEO_FIELD_NAME: { "lat": 12, "lng": 89 } } ])) .unwrap(); } @@ -3146,34 +3148,34 @@ mod tests { index .update_settings_using_wtxn(&mut wtxn, |settings| { settings.set_primary_key(S("id")); - settings.set_filterable_fields(hashset!(S("_geo"))); - settings.set_sortable_fields(hashset!(S("_geo"))); + settings.set_filterable_fields(hashset!(S(RESERVED_GEO_FIELD_NAME))); + settings.set_sortable_fields(hashset!(S(RESERVED_GEO_FIELD_NAME))); }) .unwrap(); wtxn.commit().unwrap(); let mut wtxn = index.write_txn().unwrap(); index.add_documents_using_wtxn(&mut wtxn, documents!([ - { "id": "1", "city": "Lille", "_geo": { "lat": 50.6299, "lng": 3.0569 } }, - { "id": "2", "city": "Mons-en-Barœul", "_geo": { "lat": 50.6415, "lng": 3.1106 } }, - { "id": "3", "city": "Hellemmes", "_geo": { "lat": 50.6312, "lng": 3.1106 } }, - { "id": "4", "city": "Villeneuve-d'Ascq", "_geo": { "lat": 50.6224, "lng": 3.1476 } }, - { "id": "5", "city": "Hem", "_geo": { "lat": 50.6552, "lng": 3.1897 } }, - { "id": "6", "city": "Roubaix", "_geo": { "lat": 50.6924, "lng": 3.1763 } }, - { "id": "7", "city": "Tourcoing", "_geo": { "lat": 50.7263, "lng": 3.1541 } }, - { "id": "8", "city": "Mouscron", "_geo": { "lat": 50.7453, "lng": 3.2206 } }, - { "id": "9", "city": "Tournai", "_geo": { "lat": 50.6053, "lng": 3.3758 } }, - { "id": "10", "city": "Ghent", "_geo": { "lat": 51.0537, "lng": 3.6957 } }, - { "id": "11", "city": "Brussels", "_geo": { "lat": 50.8466, "lng": 4.3370 } }, - { "id": "12", "city": "Charleroi", "_geo": { "lat": 50.4095, "lng": 4.4347 } }, - { "id": "13", "city": "Mons", "_geo": { "lat": 50.4502, "lng": 3.9623 } }, - { "id": "14", "city": "Valenciennes", "_geo": { "lat": 50.3518, "lng": 3.5326 } }, - { "id": "15", "city": "Arras", "_geo": { "lat": 50.2844, "lng": 2.7637 } }, - { "id": "16", "city": "Cambrai", "_geo": { "lat": 50.1793, "lng": 3.2189 } }, - { "id": "17", "city": "Bapaume", "_geo": { "lat": 50.1112, "lng": 2.8547 } }, - { "id": "18", "city": "Amiens", "_geo": { "lat": 49.9314, "lng": 2.2710 } }, - { "id": "19", "city": "Compiègne", "_geo": { "lat": 49.4449, "lng": 2.7913 } }, - { "id": "20", "city": "Paris", "_geo": { "lat": 48.9021, "lng": 2.3708 } } + { "id": "1", "city": "Lille", RESERVED_GEO_FIELD_NAME: { "lat": 50.6299, "lng": 3.0569 } }, + { "id": "2", "city": "Mons-en-Barœul", RESERVED_GEO_FIELD_NAME: { "lat": 50.6415, "lng": 3.1106 } }, + { "id": "3", "city": "Hellemmes", RESERVED_GEO_FIELD_NAME: { "lat": 50.6312, "lng": 3.1106 } }, + { "id": "4", "city": "Villeneuve-d'Ascq", RESERVED_GEO_FIELD_NAME: { "lat": 50.6224, "lng": 3.1476 } }, + { "id": "5", "city": "Hem", RESERVED_GEO_FIELD_NAME: { "lat": 50.6552, "lng": 3.1897 } }, + { "id": "6", "city": "Roubaix", RESERVED_GEO_FIELD_NAME: { "lat": 50.6924, "lng": 3.1763 } }, + { "id": "7", "city": "Tourcoing", RESERVED_GEO_FIELD_NAME: { "lat": 50.7263, "lng": 3.1541 } }, + { "id": "8", "city": "Mouscron", RESERVED_GEO_FIELD_NAME: { "lat": 50.7453, "lng": 3.2206 } }, + { "id": "9", "city": "Tournai", RESERVED_GEO_FIELD_NAME: { "lat": 50.6053, "lng": 3.3758 } }, + { "id": "10", "city": "Ghent", RESERVED_GEO_FIELD_NAME: { "lat": 51.0537, "lng": 3.6957 } }, + { "id": "11", "city": "Brussels", RESERVED_GEO_FIELD_NAME: { "lat": 50.8466, "lng": 4.3370 } }, + { "id": "12", "city": "Charleroi", RESERVED_GEO_FIELD_NAME: { "lat": 50.4095, "lng": 4.4347 } }, + { "id": "13", "city": "Mons", RESERVED_GEO_FIELD_NAME: { "lat": 50.4502, "lng": 3.9623 } }, + { "id": "14", "city": "Valenciennes", RESERVED_GEO_FIELD_NAME: { "lat": 50.3518, "lng": 3.5326 } }, + { "id": "15", "city": "Arras", RESERVED_GEO_FIELD_NAME: { "lat": 50.2844, "lng": 2.7637 } }, + { "id": "16", "city": "Cambrai", RESERVED_GEO_FIELD_NAME: { "lat": 50.1793, "lng": 3.2189 } }, + { "id": "17", "city": "Bapaume", RESERVED_GEO_FIELD_NAME: { "lat": 50.1112, "lng": 2.8547 } }, + { "id": "18", "city": "Amiens", RESERVED_GEO_FIELD_NAME: { "lat": 49.9314, "lng": 2.2710 } }, + { "id": "19", "city": "Compiègne", RESERVED_GEO_FIELD_NAME: { "lat": 49.4449, "lng": 2.7913 } }, + { "id": "20", "city": "Paris", RESERVED_GEO_FIELD_NAME: { "lat": 48.9021, "lng": 2.3708 } } ])).unwrap(); wtxn.commit().unwrap(); diff --git a/crates/milli/src/update/index_documents/transform.rs b/crates/milli/src/update/index_documents/transform.rs index 7477b5667..d87524a34 100644 --- a/crates/milli/src/update/index_documents/transform.rs +++ b/crates/milli/src/update/index_documents/transform.rs @@ -836,10 +836,8 @@ impl<'a, 'i> Transform<'a, 'i> { }) .collect(); - let old_vectors_fid = settings_diff - .old - .fields_ids_map - .id(crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME); + let old_vectors_fid = + settings_diff.old.fields_ids_map.id(crate::constants::RESERVED_VECTORS_FIELD_NAME); // We initialize the sorter with the user indexing settings. let mut flattened_sorter = diff --git a/crates/milli/src/update/index_documents/typed_chunk.rs b/crates/milli/src/update/index_documents/typed_chunk.rs index a97569800..d5c250e2d 100644 --- a/crates/milli/src/update/index_documents/typed_chunk.rs +++ b/crates/milli/src/update/index_documents/typed_chunk.rs @@ -137,8 +137,7 @@ pub(crate) fn write_typed_chunk_into_index( let _entered = span.enter(); let fields_ids_map = index.fields_ids_map(wtxn)?; - let vectors_fid = - fields_ids_map.id(crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME); + let vectors_fid = fields_ids_map.id(crate::constants::RESERVED_VECTORS_FIELD_NAME); let mut builder = MergerBuilder::new(KeepLatestObkv); for typed_chunk in typed_chunks { diff --git a/crates/milli/src/update/new/document.rs b/crates/milli/src/update/new/document.rs index 930b0c078..ffcf93312 100644 --- a/crates/milli/src/update/new/document.rs +++ b/crates/milli/src/update/new/document.rs @@ -7,8 +7,8 @@ use serde_json::value::RawValue; use super::vector_document::VectorDocument; use super::{KvReaderFieldId, KvWriterFieldId}; +use crate::constants::{RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME}; use crate::documents::FieldIdMapper; -use crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME; use crate::{DocumentId, GlobalFieldsIdsMap, Index, InternalError, Result, UserError}; /// A view into a document that can represent either the current version from the DB, @@ -80,7 +80,7 @@ impl<'t, Mapper: FieldIdMapper> Document<'t> for DocumentFromDb<'t, Mapper> { Err(error) => return Some(Err(error.into())), }; - if name == RESERVED_VECTORS_FIELD_NAME || name == "_geo" { + if name == RESERVED_VECTORS_FIELD_NAME || name == RESERVED_GEO_FIELD_NAME { continue; } @@ -100,7 +100,7 @@ impl<'t, Mapper: FieldIdMapper> Document<'t> for DocumentFromDb<'t, Mapper> { } fn geo_field(&self) -> Result> { - self.field("_geo") + self.field(RESERVED_GEO_FIELD_NAME) } fn top_level_fields_count(&self) -> usize { @@ -115,7 +115,7 @@ impl<'t, Mapper: FieldIdMapper> Document<'t> for DocumentFromDb<'t, Mapper> { } fn top_level_field(&self, k: &str) -> Result> { - if k == RESERVED_VECTORS_FIELD_NAME || k == "_geo" { + if k == RESERVED_VECTORS_FIELD_NAME || k == RESERVED_GEO_FIELD_NAME { return Ok(None); } self.field(k) @@ -367,7 +367,9 @@ where } if let Some(geo_value) = document.geo_field()? { - let fid = fields_ids_map.id_or_insert("_geo").ok_or(UserError::AttributeLimitReached)?; + let fid = fields_ids_map + .id_or_insert(RESERVED_GEO_FIELD_NAME) + .ok_or(UserError::AttributeLimitReached)?; fields_ids_map.id_or_insert("_geo.lat").ok_or(UserError::AttributeLimitReached)?; fields_ids_map.id_or_insert("_geo.lng").ok_or(UserError::AttributeLimitReached)?; unordered_field_buffer.push((fid, geo_value)); @@ -409,7 +411,9 @@ impl<'doc> Versions<'doc> { } pub fn iter_top_level_fields(&self) -> impl Iterator + '_ { - self.data.iter().filter(|(k, _)| *k != RESERVED_VECTORS_FIELD_NAME && *k != "_geo") + self.data + .iter() + .filter(|(k, _)| *k != RESERVED_VECTORS_FIELD_NAME && *k != RESERVED_GEO_FIELD_NAME) } pub fn vectors_field(&self) -> Option<&'doc RawValue> { @@ -417,7 +421,7 @@ impl<'doc> Versions<'doc> { } pub fn geo_field(&self) -> Option<&'doc RawValue> { - self.data.get("_geo") + self.data.get(RESERVED_GEO_FIELD_NAME) } pub fn len(&self) -> usize { @@ -429,7 +433,7 @@ impl<'doc> Versions<'doc> { } pub fn top_level_field(&self, k: &str) -> Option<&'doc RawValue> { - if k == RESERVED_VECTORS_FIELD_NAME || k == "_geo" { + if k == RESERVED_VECTORS_FIELD_NAME || k == RESERVED_GEO_FIELD_NAME { return None; } self.data.get(k) diff --git a/crates/milli/src/update/new/extract/documents.rs b/crates/milli/src/update/new/extract/documents.rs index 13307025a..832e8c463 100644 --- a/crates/milli/src/update/new/extract/documents.rs +++ b/crates/milli/src/update/new/extract/documents.rs @@ -4,6 +4,7 @@ use bumpalo::Bump; use hashbrown::HashMap; use super::DelAddRoaringBitmap; +use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::update::new::channel::DocumentsSender; use crate::update::new::document::{write_to_obkv, Document as _}; use crate::update::new::indexer::document_changes::{DocumentChangeContext, Extractor}; @@ -62,8 +63,10 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for DocumentsExtractor<'a, 'b> { context.index, &context.db_fields_ids_map, )?; - let geo_iter = - content.geo_field().transpose().map(|res| res.map(|rv| ("_geo", rv))); + let geo_iter = content + .geo_field() + .transpose() + .map(|res| res.map(|rv| (RESERVED_GEO_FIELD_NAME, rv))); for res in content.iter_top_level_fields().chain(geo_iter) { let (f, _) = res?; let entry = document_extractor_data @@ -79,8 +82,10 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for DocumentsExtractor<'a, 'b> { let docid = update.docid(); let content = update.current(&context.rtxn, context.index, &context.db_fields_ids_map)?; - let geo_iter = - content.geo_field().transpose().map(|res| res.map(|rv| ("_geo", rv))); + let geo_iter = content + .geo_field() + .transpose() + .map(|res| res.map(|rv| (RESERVED_GEO_FIELD_NAME, rv))); for res in content.iter_top_level_fields().chain(geo_iter) { let (f, _) = res?; let entry = document_extractor_data @@ -90,8 +95,10 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for DocumentsExtractor<'a, 'b> { *entry -= 1; } let content = update.updated(); - let geo_iter = - content.geo_field().transpose().map(|res| res.map(|rv| ("_geo", rv))); + let geo_iter = content + .geo_field() + .transpose() + .map(|res| res.map(|rv| (RESERVED_GEO_FIELD_NAME, rv))); for res in content.iter_top_level_fields().chain(geo_iter) { let (f, _) = res?; let entry = document_extractor_data @@ -121,8 +128,10 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for DocumentsExtractor<'a, 'b> { DocumentChange::Insertion(insertion) => { let docid = insertion.docid(); let content = insertion.inserted(); - let geo_iter = - content.geo_field().transpose().map(|res| res.map(|rv| ("_geo", rv))); + let geo_iter = content + .geo_field() + .transpose() + .map(|res| res.map(|rv| (RESERVED_GEO_FIELD_NAME, rv))); for res in content.iter_top_level_fields().chain(geo_iter) { let (f, _) = res?; let entry = document_extractor_data diff --git a/crates/milli/src/update/new/extract/faceted/facet_document.rs b/crates/milli/src/update/new/extract/faceted/facet_document.rs index eff529120..8d582d103 100644 --- a/crates/milli/src/update/new/extract/faceted/facet_document.rs +++ b/crates/milli/src/update/new/extract/faceted/facet_document.rs @@ -1,5 +1,6 @@ use serde_json::Value; +use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::update::new::document::Document; use crate::update::new::extract::geo::extract_geo_coordinates; use crate::update::new::extract::perm_json_p; @@ -69,7 +70,7 @@ pub fn extract_document_facets<'doc>( } } - if attributes_to_extract.contains(&"_geo") { + if attributes_to_extract.contains(&RESERVED_GEO_FIELD_NAME) { if let Some(geo_value) = document.geo_field()? { if let Some([lat, lng]) = extract_geo_coordinates(external_document_id, geo_value)? { let (lat_fid, lng_fid) = field_id_map diff --git a/crates/milli/src/update/new/extract/geo/mod.rs b/crates/milli/src/update/new/extract/geo/mod.rs index a3820609d..42da7766e 100644 --- a/crates/milli/src/update/new/extract/geo/mod.rs +++ b/crates/milli/src/update/new/extract/geo/mod.rs @@ -9,6 +9,7 @@ use heed::RoTxn; use serde_json::value::RawValue; use serde_json::Value; +use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::error::GeoError; use crate::update::new::document::Document; use crate::update::new::indexer::document_changes::{DocumentChangeContext, Extractor}; @@ -28,8 +29,8 @@ impl GeoExtractor { index: &Index, grenad_parameters: GrenadParameters, ) -> Result> { - let is_sortable = index.sortable_fields(rtxn)?.contains("_geo"); - let is_filterable = index.filterable_fields(rtxn)?.contains("_geo"); + let is_sortable = index.sortable_fields(rtxn)?.contains(RESERVED_GEO_FIELD_NAME); + let is_filterable = index.filterable_fields(rtxn)?.contains(RESERVED_GEO_FIELD_NAME); if is_sortable || is_filterable { Ok(Some(GeoExtractor { grenad_parameters })) } else { diff --git a/crates/milli/src/update/new/vector_document.rs b/crates/milli/src/update/new/vector_document.rs index 8d14a749d..a52dab6a1 100644 --- a/crates/milli/src/update/new/vector_document.rs +++ b/crates/milli/src/update/new/vector_document.rs @@ -10,11 +10,10 @@ use serde_json::value::RawValue; use super::document::{Document, DocumentFromDb, DocumentFromVersions, Versions}; use super::indexer::de::DeserrRawValue; +use crate::constants::RESERVED_VECTORS_FIELD_NAME; use crate::documents::FieldIdMapper; use crate::index::IndexEmbeddingConfig; -use crate::vector::parsed_vectors::{ - RawVectors, RawVectorsError, VectorOrArrayOfVectors, RESERVED_VECTORS_FIELD_NAME, -}; +use crate::vector::parsed_vectors::{RawVectors, RawVectorsError, VectorOrArrayOfVectors}; use crate::vector::{ArroyWrapper, Embedding, EmbeddingConfigs}; use crate::{DocumentId, Index, InternalError, Result, UserError}; diff --git a/crates/milli/src/update/settings.rs b/crates/milli/src/update/settings.rs index 3d2702479..85259c2d0 100644 --- a/crates/milli/src/update/settings.rs +++ b/crates/milli/src/update/settings.rs @@ -14,6 +14,7 @@ use time::OffsetDateTime; use super::del_add::DelAddOperation; use super::index_documents::{IndexDocumentsConfig, Transform}; use super::IndexerConfig; +use crate::constants::{RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME}; use crate::criterion::Criterion; use crate::error::UserError; use crate::index::{ @@ -25,7 +26,6 @@ use crate::prompt::default_max_bytes; use crate::proximity::ProximityPrecision; use crate::update::index_documents::IndexDocumentsMethod; use crate::update::{IndexDocuments, UpdateIndexingStep}; -use crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME; use crate::vector::settings::{ check_set, check_unset, EmbedderAction, EmbedderSource, EmbeddingSettings, ReindexAction, WriteBackToDocuments, @@ -1535,7 +1535,7 @@ impl InnerIndexSettings { .filter_map(|(field, count)| (count != 0).then_some(field)) .collect(); // index.fields_ids_map($a)? ==>> fields_ids_map - let geo_fields_ids = match fields_ids_map.id("_geo") { + let geo_fields_ids = match fields_ids_map.id(RESERVED_GEO_FIELD_NAME) { Some(gfid) => { let is_sortable = index.sortable_fields_ids(rtxn)?.contains(&gfid); let is_filterable = index.filterable_fields_ids(rtxn)?.contains(&gfid); diff --git a/crates/milli/src/vector/parsed_vectors.rs b/crates/milli/src/vector/parsed_vectors.rs index da41d1771..5fcb2912b 100644 --- a/crates/milli/src/vector/parsed_vectors.rs +++ b/crates/milli/src/vector/parsed_vectors.rs @@ -10,8 +10,6 @@ use crate::index::IndexEmbeddingConfig; use crate::update::del_add::{DelAdd, KvReaderDelAdd}; use crate::{DocumentId, FieldId, InternalError, UserError}; -pub const RESERVED_VECTORS_FIELD_NAME: &str = "_vectors"; - #[derive(serde::Serialize, Debug)] #[serde(untagged)] pub enum RawVectors<'doc> {