From e6a7521610fcad26483c5fa20ec23c57982dbb22 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Mon, 18 May 2020 18:56:21 +0200 Subject: [PATCH 1/8] Introduce the DiscoverIds and DocumentsIds types --- meilisearch-core/src/store/documents_ids.rs | 75 +++++++++++++++++++++ meilisearch-core/src/store/mod.rs | 7 +- 2 files changed, 79 insertions(+), 3 deletions(-) create mode 100644 meilisearch-core/src/store/documents_ids.rs diff --git a/meilisearch-core/src/store/documents_ids.rs b/meilisearch-core/src/store/documents_ids.rs new file mode 100644 index 000000000..d9d80d33c --- /dev/null +++ b/meilisearch-core/src/store/documents_ids.rs @@ -0,0 +1,75 @@ +use std::borrow::Cow; + +use heed::{BytesDecode, BytesEncode}; +use sdset::Set; + +use crate::DocumentId; +use super::cow_set::CowSet; + +pub struct DocumentsIds; + +impl BytesEncode<'_> for DocumentsIds { + type EItem = Set; + + fn bytes_encode(item: &Self::EItem) -> Option> { + CowSet::bytes_encode(item) + } +} + +impl<'a> BytesDecode<'a> for DocumentsIds { + type DItem = Cow<'a, Set>; + + fn bytes_decode(bytes: &'a [u8]) -> Option { + CowSet::bytes_decode(bytes) + } +} + +pub struct DiscoverIds<'a> { + ids_iter: std::slice::Iter<'a, DocumentId>, + left_id: Option, + right_id: Option, + available_range: std::ops::Range, +} + +impl DiscoverIds<'_> { + pub fn new(ids: &Set) -> DiscoverIds { + let mut ids_iter = ids.iter(); + let right_id = ids_iter.next().map(|id| id.0); + let available_range = 0..right_id.unwrap_or(u64::max_value()); + DiscoverIds { ids_iter, left_id: None, right_id, available_range } + } +} + +impl Iterator for DiscoverIds<'_> { + type Item = DocumentId; + + fn next(&mut self) -> Option { + loop { + match self.available_range.next() { + // The available range gives us a new id, we return it. + Some(id) => return Some(DocumentId(id)), + // The available range is exhausted, we need to find the next one. + None if self.available_range.end == u64::max_value() => return None, + None => loop { + self.left_id = self.right_id.take(); + self.right_id = self.ids_iter.next().map(|id| id.0); + match (self.left_id, self.right_id) { + // We found a gap in the used ids, we can yield all ids + // until the end of the gap + (Some(l), Some(r)) => if l.saturating_add(1) != r { + self.available_range = (l + 1)..r; + break; + }, + // The last used id has been reached, we can use all ids + // until u64 MAX + (Some(l), None) => { + self.available_range = l.saturating_add(1)..u64::max_value(); + break; + }, + _ => (), + } + }, + } + } + } +} diff --git a/meilisearch-core/src/store/mod.rs b/meilisearch-core/src/store/mod.rs index 6448a3441..c172d3204 100644 --- a/meilisearch-core/src/store/mod.rs +++ b/meilisearch-core/src/store/mod.rs @@ -1,15 +1,16 @@ mod cow_set; mod docs_words; -mod prefix_documents_cache; -mod prefix_postings_lists_cache; +mod documents_ids; mod documents_fields; mod documents_fields_counts; +mod facets; mod main; mod postings_lists; +mod prefix_documents_cache; +mod prefix_postings_lists_cache; mod synonyms; mod updates; mod updates_results; -mod facets; pub use self::docs_words::DocsWords; pub use self::facets::Facets; From 016bfa391b2abaed6ff2ee44c975cd39b42fc2b5 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Tue, 19 May 2020 11:02:28 +0200 Subject: [PATCH 2/8] Introduce internal and user ids put and get methods --- meilisearch-core/src/store/main.rs | 51 +++++++++++++++++++++++------- meilisearch-core/src/store/mod.rs | 12 +++---- 2 files changed, 46 insertions(+), 17 deletions(-) diff --git a/meilisearch-core/src/store/main.rs b/meilisearch-core/src/store/main.rs index 34a88afcd..9c3f89f39 100644 --- a/meilisearch-core/src/store/main.rs +++ b/meilisearch-core/src/store/main.rs @@ -3,29 +3,32 @@ use std::sync::Arc; use std::collections::HashMap; use chrono::{DateTime, Utc}; -use heed::types::{ByteSlice, OwnedType, SerdeBincode, Str}; use heed::Result as ZResult; +use heed::types::{ByteSlice, OwnedType, SerdeBincode, Str}; use meilisearch_schema::{FieldId, Schema}; +use meilisearch_types::DocumentId; use sdset::Set; use crate::database::MainT; use crate::RankedMap; use crate::settings::RankingRule; -use super::cow_set::CowSet; +use super::{CowSet, DocumentsIds}; -const CREATED_AT_KEY: &str = "created-at"; const ATTRIBUTES_FOR_FACETING: &str = "attributes-for-faceting"; -const RANKING_RULES_KEY: &str = "ranking-rules"; -const DISTINCT_ATTRIBUTE_KEY: &str = "distinct-attribute"; -const STOP_WORDS_KEY: &str = "stop-words"; -const SYNONYMS_KEY: &str = "synonyms"; +const CREATED_AT_KEY: &str = "created-at"; const CUSTOMS_KEY: &str = "customs"; +const DISTINCT_ATTRIBUTE_KEY: &str = "distinct-attribute"; const FIELDS_FREQUENCY_KEY: &str = "fields-frequency"; +const INTERNAL_IDS_KEY: &str = "internal-ids"; const NAME_KEY: &str = "name"; const NUMBER_OF_DOCUMENTS_KEY: &str = "number-of-documents"; const RANKED_MAP_KEY: &str = "ranked-map"; +const RANKING_RULES_KEY: &str = "ranking-rules"; const SCHEMA_KEY: &str = "schema"; +const STOP_WORDS_KEY: &str = "stop-words"; +const SYNONYMS_KEY: &str = "synonyms"; const UPDATED_AT_KEY: &str = "updated-at"; +const USER_IDS_KEY: &str = "user-ids"; const WORDS_KEY: &str = "words"; pub type FreqsMap = HashMap; @@ -71,9 +74,35 @@ impl Main { self.main.get::<_, Str, SerdeDatetime>(reader, UPDATED_AT_KEY) } + pub fn put_internal_ids(self, writer: &mut heed::RwTxn, ids: &sdset::Set) -> ZResult<()> { + self.main.put::<_, Str, DocumentsIds>(writer, INTERNAL_IDS_KEY, ids) + } + + pub fn internal_ids<'txn>(self, reader: &'txn heed::RoTxn) -> ZResult>> { + match self.main.get::<_, Str, DocumentsIds>(reader, INTERNAL_IDS_KEY)? { + Some(ids) => Ok(ids), + None => Ok(Cow::default()), + } + } + + pub fn put_user_ids(self, writer: &mut heed::RwTxn, ids: &fst::Map) -> ZResult<()> { + self.main.put::<_, Str, ByteSlice>(writer, USER_IDS_KEY, ids.as_fst().as_bytes()) + } + + pub fn user_ids(self, reader: &heed::RoTxn) -> ZResult { + match self.main.get::<_, Str, ByteSlice>(reader, USER_IDS_KEY)? { + Some(bytes) => { + let len = bytes.len(); + let bytes = Arc::new(bytes.to_owned()); + let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len).unwrap(); + Ok(fst::Map::from(fst)) + }, + None => Ok(fst::Map::default()), + } + } + pub fn put_words_fst(self, writer: &mut heed::RwTxn, fst: &fst::Set) -> ZResult<()> { - let bytes = fst.as_fst().as_bytes(); - self.main.put::<_, Str, ByteSlice>(writer, WORDS_KEY, bytes) + self.main.put::<_, Str, ByteSlice>(writer, WORDS_KEY, fst.as_fst().as_bytes()) } pub unsafe fn static_words_fst(self, reader: &heed::RoTxn) -> ZResult> { @@ -82,7 +111,7 @@ impl Main { let bytes: &'static [u8] = std::mem::transmute(bytes); let set = fst::Set::from_static_slice(bytes).unwrap(); Ok(Some(set)) - } + }, None => Ok(None), } } @@ -94,7 +123,7 @@ impl Main { let bytes = Arc::new(bytes.to_owned()); let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len).unwrap(); Ok(Some(fst::Set::from(fst))) - } + }, None => Ok(None), } } diff --git a/meilisearch-core/src/store/mod.rs b/meilisearch-core/src/store/mod.rs index c172d3204..8c200be22 100644 --- a/meilisearch-core/src/store/mod.rs +++ b/meilisearch-core/src/store/mod.rs @@ -12,16 +12,16 @@ mod synonyms; mod updates; mod updates_results; +pub use self::cow_set::CowSet; pub use self::docs_words::DocsWords; -pub use self::facets::Facets; -pub use self::prefix_documents_cache::PrefixDocumentsCache; -pub use self::prefix_postings_lists_cache::PrefixPostingsListsCache; pub use self::documents_fields::{DocumentFieldsIter, DocumentsFields}; -pub use self::documents_fields_counts::{ - DocumentFieldsCountsIter, DocumentsFieldsCounts, DocumentsIdsIter, -}; +pub use self::documents_fields_counts::{DocumentFieldsCountsIter, DocumentsFieldsCounts, DocumentsIdsIter}; +pub use self::documents_ids::{DocumentsIds, DiscoverIds}; +pub use self::facets::Facets; pub use self::main::Main; pub use self::postings_lists::PostingsLists; +pub use self::prefix_documents_cache::PrefixDocumentsCache; +pub use self::prefix_postings_lists_cache::PrefixPostingsListsCache; pub use self::synonyms::Synonyms; pub use self::updates::Updates; pub use self::updates_results::UpdatesResults; From 5bf15a419050ffae99d2f3179ce9c2d5c7e916b9 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Tue, 19 May 2020 11:45:46 +0200 Subject: [PATCH 3/8] Compute and merge discovered ids --- Cargo.lock | 1 - meilisearch-core/Cargo.toml | 1 - meilisearch-core/src/store/main.rs | 26 ++++++++++++ .../src/update/documents_addition.rs | 20 ++++++++-- .../src/update/documents_deletion.rs | 5 ++- meilisearch-core/src/update/helpers.rs | 40 +++++++++++++------ meilisearch-core/src/update/mod.rs | 2 +- 7 files changed, 75 insertions(+), 20 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 899893044..3c0f0456e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1624,7 +1624,6 @@ dependencies = [ "sdset", "serde", "serde_json", - "siphasher", "slice-group-by", "structopt", "tempfile", diff --git a/meilisearch-core/Cargo.toml b/meilisearch-core/Cargo.toml index d9b88f89b..341c6336a 100644 --- a/meilisearch-core/Cargo.toml +++ b/meilisearch-core/Cargo.toml @@ -35,7 +35,6 @@ regex = "1.3.6" sdset = "0.4.0" serde = { version = "1.0.105", features = ["derive"] } serde_json = { version = "1.0.50", features = ["preserve_order"] } -siphasher = "0.3.2" slice-group-by = "0.2.6" unicase = "2.6.0" zerocopy = "0.3.0" diff --git a/meilisearch-core/src/store/main.rs b/meilisearch-core/src/store/main.rs index 9c3f89f39..0a23b82cb 100644 --- a/meilisearch-core/src/store/main.rs +++ b/meilisearch-core/src/store/main.rs @@ -85,10 +85,36 @@ impl Main { } } + pub fn merge_internal_ids(self, writer: &mut heed::RwTxn, new_ids: &sdset::Set) -> ZResult<()> { + use sdset::SetOperation; + + // We do an union of the old and new internal ids. + let internal_ids = self.internal_ids(writer)?; + let internal_ids = sdset::duo::Union::new(&new_ids, &internal_ids).into_set_buf(); + self.put_internal_ids(writer, &internal_ids) + } + pub fn put_user_ids(self, writer: &mut heed::RwTxn, ids: &fst::Map) -> ZResult<()> { self.main.put::<_, Str, ByteSlice>(writer, USER_IDS_KEY, ids.as_fst().as_bytes()) } + pub fn merge_user_ids(self, writer: &mut heed::RwTxn, new_ids: &fst::Map) -> ZResult<()> { + use fst::{Streamer, IntoStreamer}; + + let user_ids = self.user_ids(writer)?; + + // Do an union of the old and the new set of user ids. + let mut op = user_ids.op().add(new_ids.into_stream()).r#union(); + let mut build = fst::MapBuilder::memory(); + while let Some((userid, values)) = op.next() { + build.insert(userid, values[0].value).unwrap(); + } + let user_ids = build.into_inner().unwrap(); + + // TODO prefer using self.put_user_ids + self.main.put::<_, Str, ByteSlice>(writer, USER_IDS_KEY, user_ids.as_slice()) + } + pub fn user_ids(self, reader: &heed::RoTxn) -> ZResult { match self.main.get::<_, Str, ByteSlice>(reader, USER_IDS_KEY)? { Some(bytes) => { diff --git a/meilisearch-core/src/update/documents_addition.rs b/meilisearch-core/src/update/documents_addition.rs index d9d1af328..efafe3e1c 100644 --- a/meilisearch-core/src/update/documents_addition.rs +++ b/meilisearch-core/src/update/documents_addition.rs @@ -1,4 +1,4 @@ -use std::collections::HashMap; +use std::collections::{HashMap, BTreeMap}; use fst::{set::OpBuilder, SetBuilder}; use indexmap::IndexMap; @@ -13,7 +13,7 @@ use crate::database::{UpdateEvent, UpdateEventsEmitter}; use crate::facets; use crate::raw_indexer::RawIndexer; use crate::serde::Deserializer; -use crate::store::{self, DocumentsFields, DocumentsFieldsCounts}; +use crate::store::{self, DocumentsFields, DocumentsFieldsCounts, DiscoverIds}; use crate::update::helpers::{index_value, value_to_number, extract_document_id}; use crate::update::{apply_documents_deletion, compute_short_prefixes, next_update_id, Update}; use crate::{Error, MResult, RankedMap}; @@ -150,17 +150,26 @@ pub fn apply_addition<'a, 'b>( partial: bool ) -> MResult<()> { let mut documents_additions = HashMap::new(); + let mut new_user_ids = BTreeMap::new(); + let mut new_internal_ids = Vec::with_capacity(new_documents.len()); let mut schema = match index.main.schema(writer)? { Some(schema) => schema, None => return Err(Error::SchemaMissing), }; + // Retrieve the documents ids related structures + let user_ids = index.main.user_ids(writer)?; + let internal_ids = index.main.internal_ids(writer)?; + let mut available_ids = DiscoverIds::new(&internal_ids); + let primary_key = schema.primary_key().ok_or(Error::MissingPrimaryKey)?; // 1. store documents ids for future deletion for mut document in new_documents { - let document_id = extract_document_id(&primary_key, &document)?; + let (document_id, userid) = extract_document_id(&primary_key, &document, &user_ids, &mut available_ids)?; + new_user_ids.insert(userid, document_id.0); + new_internal_ids.push(document_id); if partial { let mut deserializer = Deserializer { @@ -233,6 +242,11 @@ pub fn apply_addition<'a, 'b>( index.main.put_schema(writer, &schema)?; + let new_user_ids = fst::Map::from_iter(new_user_ids)?; + let new_internal_ids = sdset::SetBuf::from_dirty(new_internal_ids); + index.main.merge_user_ids(writer, &new_user_ids)?; + index.main.merge_internal_ids(writer, &new_internal_ids)?; + Ok(()) } diff --git a/meilisearch-core/src/update/documents_deletion.rs b/meilisearch-core/src/update/documents_deletion.rs index 4526d053d..bfca8b360 100644 --- a/meilisearch-core/src/update/documents_deletion.rs +++ b/meilisearch-core/src/update/documents_deletion.rs @@ -71,7 +71,10 @@ pub fn apply_documents_deletion( writer: &mut heed::RwTxn, index: &store::Index, deletion: Vec, -) -> MResult<()> { +) -> MResult<()> +{ + unimplemented!("When we delete documents we must ask for user ids instead of internal ones"); + let schema = match index.main.schema(writer)? { Some(schema) => schema, None => return Err(Error::SchemaMissing), diff --git a/meilisearch-core/src/update/helpers.rs b/meilisearch-core/src/update/helpers.rs index d17bea3b2..7f9a7b634 100644 --- a/meilisearch-core/src/update/helpers.rs +++ b/meilisearch-core/src/update/helpers.rs @@ -1,16 +1,15 @@ use std::fmt::Write as _; -use std::hash::{Hash, Hasher}; use indexmap::IndexMap; use meilisearch_schema::IndexedPos; use meilisearch_types::DocumentId; use ordered_float::OrderedFloat; use serde_json::Value; -use siphasher::sip::SipHasher; +use crate::Number; use crate::raw_indexer::RawIndexer; use crate::serde::SerializerError; -use crate::Number; +use crate::store::DiscoverIds; /// Returns the number of words indexed or `None` if the type is unindexable. pub fn index_value( @@ -96,28 +95,43 @@ pub fn value_to_number(value: &Value) -> Option { } } -/// Validates a string representation to be a correct document id and -/// returns the hash of the given type, this is the way we produce documents ids. -pub fn compute_document_id(string: &str) -> Result { - if string.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') { - let mut s = SipHasher::new(); - string.hash(&mut s); - Ok(DocumentId(s.finish())) +/// Validates a string representation to be a correct document id and returns +/// the corresponding id or generate a new one, this is the way we produce documents ids. +pub fn discover_document_id( + userid: &str, + user_ids: &fst::Map, + available_ids: &mut DiscoverIds<'_>, +) -> Result +{ + if userid.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') { + match user_ids.get(userid) { + Some(internal_id) => Ok(DocumentId(internal_id)), + None => { + let internal_id = available_ids.next().expect("no more ids available"); + Ok(internal_id) + }, + } } else { Err(SerializerError::InvalidDocumentIdFormat) } } /// Extracts and validates the document id of a document. -pub fn extract_document_id(primary_key: &str, document: &IndexMap) -> Result { +pub fn extract_document_id( + primary_key: &str, + document: &IndexMap, + user_ids: &fst::Map, + available_ids: &mut DiscoverIds<'_>, +) -> Result<(DocumentId, String), SerializerError> +{ match document.get(primary_key) { Some(value) => { - let string = match value { + let userid = match value { Value::Number(number) => number.to_string(), Value::String(string) => string.clone(), _ => return Err(SerializerError::InvalidDocumentIdFormat), }; - compute_document_id(&string) + discover_document_id(&userid, user_ids, available_ids).map(|id| (id, userid)) } None => Err(SerializerError::DocumentIdNotFound), } diff --git a/meilisearch-core/src/update/mod.rs b/meilisearch-core/src/update/mod.rs index 5b7c33c9d..55bdc9edc 100644 --- a/meilisearch-core/src/update/mod.rs +++ b/meilisearch-core/src/update/mod.rs @@ -9,7 +9,7 @@ pub use self::clear_all::{apply_clear_all, push_clear_all}; pub use self::customs_update::{apply_customs_update, push_customs_update}; pub use self::documents_addition::{apply_documents_addition, apply_documents_partial_addition, DocumentsAddition}; pub use self::documents_deletion::{apply_documents_deletion, DocumentsDeletion}; -pub use self::helpers::{index_value, value_to_string, value_to_number, compute_document_id, extract_document_id}; +pub use self::helpers::{index_value, value_to_string, value_to_number, discover_document_id, extract_document_id}; pub use self::settings_update::{apply_settings_update, push_settings_update}; use std::cmp; From 3bca31856d8b252ce207062a5cf7e2636bd95d18 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Tue, 19 May 2020 13:12:02 +0200 Subject: [PATCH 4/8] Discover and remove documents ids --- meilisearch-core/src/database.rs | 16 ++++----- meilisearch-core/src/store/main.rs | 33 +++++++++++++++-- meilisearch-core/src/update/clear_all.rs | 2 ++ .../src/update/documents_addition.rs | 4 +-- .../src/update/documents_deletion.rs | 36 +++++++++++++------ meilisearch-core/src/update/mod.rs | 6 ++-- meilisearch-http/src/routes/document.rs | 18 +++++----- 7 files changed, 82 insertions(+), 33 deletions(-) diff --git a/meilisearch-core/src/database.rs b/meilisearch-core/src/database.rs index d7f78dca1..5672c3c1e 100644 --- a/meilisearch-core/src/database.rs +++ b/meilisearch-core/src/database.rs @@ -775,12 +775,12 @@ mod tests { assert!(document.is_none()); let document: Option = index - .document(&reader, None, DocumentId(7_900_334_843_754_999_545)) + .document(&reader, None, DocumentId(0)) .unwrap(); assert!(document.is_some()); let document: Option = index - .document(&reader, None, DocumentId(8_367_468_610_878_465_872)) + .document(&reader, None, DocumentId(1)) .unwrap(); assert!(document.is_some()); } @@ -855,12 +855,12 @@ mod tests { assert!(document.is_none()); let document: Option = index - .document(&reader, None, DocumentId(7_900_334_843_754_999_545)) + .document(&reader, None, DocumentId(0)) .unwrap(); assert!(document.is_some()); let document: Option = index - .document(&reader, None, DocumentId(8_367_468_610_878_465_872)) + .document(&reader, None, DocumentId(1)) .unwrap(); assert!(document.is_some()); @@ -897,7 +897,7 @@ mod tests { let reader = db.main_read_txn().unwrap(); let document: Option = index - .document(&reader, None, DocumentId(7_900_334_843_754_999_545)) + .document(&reader, None, DocumentId(0)) .unwrap(); let new_doc1 = serde_json::json!({ @@ -908,7 +908,7 @@ mod tests { assert_eq!(document, Some(new_doc1)); let document: Option = index - .document(&reader, None, DocumentId(8_367_468_610_878_465_872)) + .document(&reader, None, DocumentId(1)) .unwrap(); let new_doc2 = serde_json::json!({ @@ -1080,14 +1080,14 @@ mod tests { assert_matches!( iter.next(), Some(Document { - id: DocumentId(7_900_334_843_754_999_545), + id: DocumentId(0), .. }) ); assert_matches!( iter.next(), Some(Document { - id: DocumentId(8_367_468_610_878_465_872), + id: DocumentId(1), .. }) ); diff --git a/meilisearch-core/src/store/main.rs b/meilisearch-core/src/store/main.rs index 0a23b82cb..1a7901d6f 100644 --- a/meilisearch-core/src/store/main.rs +++ b/meilisearch-core/src/store/main.rs @@ -90,7 +90,16 @@ impl Main { // We do an union of the old and new internal ids. let internal_ids = self.internal_ids(writer)?; - let internal_ids = sdset::duo::Union::new(&new_ids, &internal_ids).into_set_buf(); + let internal_ids = sdset::duo::Union::new(&internal_ids, new_ids).into_set_buf(); + self.put_internal_ids(writer, &internal_ids) + } + + pub fn remove_internal_ids(self, writer: &mut heed::RwTxn, ids: &sdset::Set) -> ZResult<()> { + use sdset::SetOperation; + + // We do a difference of the old and new internal ids. + let internal_ids = self.internal_ids(writer)?; + let internal_ids = sdset::duo::Difference::new(&internal_ids, ids).into_set_buf(); self.put_internal_ids(writer, &internal_ids) } @@ -101,10 +110,25 @@ impl Main { pub fn merge_user_ids(self, writer: &mut heed::RwTxn, new_ids: &fst::Map) -> ZResult<()> { use fst::{Streamer, IntoStreamer}; + // Do an union of the old and the new set of user ids. let user_ids = self.user_ids(writer)?; + let mut op = user_ids.op().add(new_ids.into_stream()).r#union(); + let mut build = fst::MapBuilder::memory(); + while let Some((userid, values)) = op.next() { + build.insert(userid, values[0].value).unwrap(); + } + let user_ids = build.into_inner().unwrap(); + + // TODO prefer using self.put_user_ids + self.main.put::<_, Str, ByteSlice>(writer, USER_IDS_KEY, user_ids.as_slice()) + } + + pub fn remove_user_ids(self, writer: &mut heed::RwTxn, ids: &fst::Map) -> ZResult<()> { + use fst::{Streamer, IntoStreamer}; // Do an union of the old and the new set of user ids. - let mut op = user_ids.op().add(new_ids.into_stream()).r#union(); + let user_ids = self.user_ids(writer)?; + let mut op = user_ids.op().add(ids.into_stream()).difference(); let mut build = fst::MapBuilder::memory(); while let Some((userid, values)) = op.next() { build.insert(userid, values[0].value).unwrap(); @@ -127,6 +151,11 @@ impl Main { } } + pub fn user_to_internal_id(self, reader: &heed::RoTxn, userid: &str) -> ZResult> { + let user_ids = self.user_ids(reader)?; + Ok(user_ids.get(userid).map(DocumentId)) + } + pub fn put_words_fst(self, writer: &mut heed::RwTxn, fst: &fst::Set) -> ZResult<()> { self.main.put::<_, Str, ByteSlice>(writer, WORDS_KEY, fst.as_fst().as_bytes()) } diff --git a/meilisearch-core/src/update/clear_all.rs b/meilisearch-core/src/update/clear_all.rs index 0c52f5190..c393b8a6c 100644 --- a/meilisearch-core/src/update/clear_all.rs +++ b/meilisearch-core/src/update/clear_all.rs @@ -7,6 +7,8 @@ pub fn apply_clear_all( index: &store::Index, ) -> MResult<()> { index.main.put_words_fst(writer, &fst::Set::default())?; + index.main.put_user_ids(writer, &fst::Map::default())?; + index.main.put_internal_ids(writer, &sdset::SetBuf::default())?; index.main.put_ranked_map(writer, &RankedMap::default())?; index.main.put_number_of_documents(writer, |_| 0)?; index.documents_fields.clear(writer)?; diff --git a/meilisearch-core/src/update/documents_addition.rs b/meilisearch-core/src/update/documents_addition.rs index efafe3e1c..d79446678 100644 --- a/meilisearch-core/src/update/documents_addition.rs +++ b/meilisearch-core/src/update/documents_addition.rs @@ -190,9 +190,9 @@ pub fn apply_addition<'a, 'b>( documents_additions.insert(document_id, document); } - // 2. remove the documents posting lists + // 2. remove the documents postings lists let number_of_inserted_documents = documents_additions.len(); - let documents_ids = documents_additions.iter().map(|(id, _)| *id).collect(); + let documents_ids = new_user_ids.iter().map(|(userid, _)| userid.clone()).collect(); apply_documents_deletion(writer, index, documents_ids)?; let mut ranked_map = match index.main.ranked_map(writer)? { diff --git a/meilisearch-core/src/update/documents_deletion.rs b/meilisearch-core/src/update/documents_deletion.rs index bfca8b360..369e6901c 100644 --- a/meilisearch-core/src/update/documents_deletion.rs +++ b/meilisearch-core/src/update/documents_deletion.rs @@ -14,7 +14,7 @@ pub struct DocumentsDeletion { updates_store: store::Updates, updates_results_store: store::UpdatesResults, updates_notifier: UpdateEventsEmitter, - documents: Vec, + documents: Vec, } impl DocumentsDeletion { @@ -31,7 +31,7 @@ impl DocumentsDeletion { } } - pub fn delete_document_by_id(&mut self, document_id: DocumentId) { + pub fn delete_document_by_user_id(&mut self, document_id: String) { self.documents.push(document_id); } @@ -47,8 +47,8 @@ impl DocumentsDeletion { } } -impl Extend for DocumentsDeletion { - fn extend>(&mut self, iter: T) { +impl Extend for DocumentsDeletion { + fn extend>(&mut self, iter: T) { self.documents.extend(iter) } } @@ -57,7 +57,7 @@ pub fn push_documents_deletion( writer: &mut heed::RwTxn, updates_store: store::Updates, updates_results_store: store::UpdatesResults, - deletion: Vec, + deletion: Vec, ) -> MResult { let last_update_id = next_update_id(writer, updates_store, updates_results_store)?; @@ -70,10 +70,23 @@ pub fn push_documents_deletion( pub fn apply_documents_deletion( writer: &mut heed::RwTxn, index: &store::Index, - deletion: Vec, + deletion: Vec, ) -> MResult<()> { - unimplemented!("When we delete documents we must ask for user ids instead of internal ones"); + let (user_ids, internal_ids) = { + let new_user_ids = SetBuf::from_dirty(deletion); + let mut internal_ids = Vec::new(); + + let user_ids = index.main.user_ids(writer)?; + for userid in new_user_ids.as_slice() { + if let Some(id) = user_ids.get(userid) { + internal_ids.push(DocumentId(id)); + } + } + + let new_user_ids = fst::Map::from_iter(new_user_ids.into_iter().map(|k| (k, 0))).unwrap(); + (new_user_ids, SetBuf::from_dirty(internal_ids)) + }; let schema = match index.main.schema(writer)? { Some(schema) => schema, @@ -87,16 +100,15 @@ pub fn apply_documents_deletion( // facet filters deletion if let Some(attributes_for_facetting) = index.main.attributes_for_faceting(writer)? { - let facet_map = facets::facet_map_from_docids(writer, &index, &deletion, &attributes_for_facetting)?; + let facet_map = facets::facet_map_from_docids(writer, &index, &internal_ids, &attributes_for_facetting)?; index.facets.remove(writer, facet_map)?; } // collect the ranked attributes according to the schema let ranked_fields = schema.ranked(); - let idset = SetBuf::from_dirty(deletion); let mut words_document_ids = HashMap::new(); - for id in idset { + for id in internal_ids.iter().cloned() { // remove all the ranked attributes from the ranked_map for ranked_attr in ranked_fields { ranked_map.remove(id, *ranked_attr); @@ -166,6 +178,10 @@ pub fn apply_documents_deletion( index.main.put_ranked_map(writer, &ranked_map)?; index.main.put_number_of_documents(writer, |old| old - deleted_documents_len)?; + // We apply the changes to the user and internal ids + index.main.remove_user_ids(writer, &user_ids)?; + index.main.remove_internal_ids(writer, &internal_ids)?; + compute_short_prefixes(writer, index)?; Ok(()) diff --git a/meilisearch-core/src/update/mod.rs b/meilisearch-core/src/update/mod.rs index 55bdc9edc..d2d771030 100644 --- a/meilisearch-core/src/update/mod.rs +++ b/meilisearch-core/src/update/mod.rs @@ -24,7 +24,7 @@ use sdset::Set; use serde::{Deserialize, Serialize}; use serde_json::Value; -use crate::{store, DocumentId, MResult}; +use crate::{store, MResult}; use crate::database::{MainT, UpdateT}; use crate::settings::SettingsUpdate; @@ -63,7 +63,7 @@ impl Update { } } - fn documents_deletion(data: Vec) -> Update { + fn documents_deletion(data: Vec) -> Update { Update { data: UpdateData::DocumentsDeletion(data), enqueued_at: Utc::now(), @@ -84,7 +84,7 @@ pub enum UpdateData { Customs(Vec), DocumentsAddition(Vec>), DocumentsPartial(Vec>), - DocumentsDeletion(Vec), + DocumentsDeletion(Vec), Settings(SettingsUpdate) } diff --git a/meilisearch-http/src/routes/document.rs b/meilisearch-http/src/routes/document.rs index 3b71e656a..d165b5b39 100644 --- a/meilisearch-http/src/routes/document.rs +++ b/meilisearch-http/src/routes/document.rs @@ -3,7 +3,7 @@ use std::collections::{BTreeSet, HashSet}; use actix_web::{web, HttpResponse}; use actix_web_macros::{delete, get, post, put}; use indexmap::IndexMap; -use meilisearch_core::{update, Error}; +use meilisearch_core::update; use serde::Deserialize; use serde_json::Value; @@ -43,11 +43,16 @@ async fn get_document( .open_index(&path.index_uid) .ok_or(ResponseError::index_not_found(&path.index_uid))?; - let document_id = update::compute_document_id(&path.document_id).map_err(Error::Serializer)?; let reader = data.db.main_read_txn()?; + let internal_id = index.main.user_to_internal_id(&reader, &path.document_id)?; + + let internal_id = match internal_id { + Some(internal_id) => internal_id, + None => return Err(ResponseError::document_not_found(&path.document_id)), + }; let response: Document = index - .document(&reader, None, document_id)? + .document(&reader, None, internal_id)? .ok_or(ResponseError::document_not_found(&path.document_id))?; Ok(HttpResponse::Ok().json(response)) @@ -66,12 +71,10 @@ async fn delete_document( .open_index(&path.index_uid) .ok_or(ResponseError::index_not_found(&path.index_uid))?; - let document_id = update::compute_document_id(&path.document_id).map_err(Error::Serializer)?; - let mut update_writer = data.db.update_write_txn()?; let mut documents_deletion = index.documents_deletion(); - documents_deletion.delete_document_by_id(document_id); + documents_deletion.delete_document_by_user_id(path.document_id.clone()); let update_id = documents_deletion.finalize(&mut update_writer)?; @@ -239,8 +242,7 @@ async fn delete_documents( for document_id in body.into_inner() { let document_id = update::value_to_string(&document_id); - let document_id = update::compute_document_id(&document_id).map_err(Error::Serializer)?; - documents_deletion.delete_document_by_id(document_id); + documents_deletion.delete_document_by_user_id(document_id); } let update_id = documents_deletion.finalize(&mut writer)?; From 788e2202c9ac6e2ac685b82b711819c5a4bb5df3 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Tue, 19 May 2020 13:53:31 +0200 Subject: [PATCH 5/8] Reduce the DocumentId size from 64 to 32bits --- meilisearch-core/src/lib.rs | 2 +- meilisearch-core/src/query_builder.rs | 4 ++-- meilisearch-core/src/store/docs_words.rs | 10 +++++----- meilisearch-core/src/store/documents_ids.rs | 14 +++++++------- meilisearch-core/src/store/main.rs | 2 +- meilisearch-core/src/store/mod.rs | 11 ++++++----- .../src/store/prefix_documents_cache.rs | 10 +++++----- meilisearch-core/src/update/documents_addition.rs | 2 +- meilisearch-core/src/update/documents_deletion.rs | 2 +- meilisearch-core/src/update/helpers.rs | 2 +- meilisearch-http/src/error.rs | 4 ++-- meilisearch-types/src/lib.rs | 2 +- 12 files changed, 33 insertions(+), 32 deletions(-) diff --git a/meilisearch-core/src/lib.rs b/meilisearch-core/src/lib.rs index 38a1a3c34..1a4b0d2e6 100644 --- a/meilisearch-core/src/lib.rs +++ b/meilisearch-core/src/lib.rs @@ -191,6 +191,6 @@ mod tests { #[test] fn docindex_mem_size() { - assert_eq!(mem::size_of::(), 16); + assert_eq!(mem::size_of::(), 12); } } diff --git a/meilisearch-core/src/query_builder.rs b/meilisearch-core/src/query_builder.rs index 1f1ef28e3..d1b35273d 100644 --- a/meilisearch-core/src/query_builder.rs +++ b/meilisearch-core/src/query_builder.rs @@ -228,7 +228,7 @@ mod tests { builder.into_inner().and_then(Set::from_bytes).unwrap() } - const fn doc_index(document_id: u64, word_index: u16) -> DocIndex { + const fn doc_index(document_id: u32, word_index: u16) -> DocIndex { DocIndex { document_id: DocumentId(document_id), attribute: 0, @@ -238,7 +238,7 @@ mod tests { } } - const fn doc_char_index(document_id: u64, word_index: u16, char_index: u16) -> DocIndex { + const fn doc_char_index(document_id: u32, word_index: u16, char_index: u16) -> DocIndex { DocIndex { document_id: DocumentId(document_id), attribute: 0, diff --git a/meilisearch-core/src/store/docs_words.rs b/meilisearch-core/src/store/docs_words.rs index 0ae153b3f..1e33a9bb0 100644 --- a/meilisearch-core/src/store/docs_words.rs +++ b/meilisearch-core/src/store/docs_words.rs @@ -1,4 +1,4 @@ -use super::BEU64; +use super::BEU32; use crate::database::MainT; use crate::DocumentId; use heed::types::{ByteSlice, OwnedType}; @@ -7,7 +7,7 @@ use std::sync::Arc; #[derive(Copy, Clone)] pub struct DocsWords { - pub(crate) docs_words: heed::Database, ByteSlice>, + pub(crate) docs_words: heed::Database, ByteSlice>, } impl DocsWords { @@ -17,13 +17,13 @@ impl DocsWords { document_id: DocumentId, words: &fst::Set, ) -> ZResult<()> { - let document_id = BEU64::new(document_id.0); + let document_id = BEU32::new(document_id.0); let bytes = words.as_fst().as_bytes(); self.docs_words.put(writer, &document_id, bytes) } pub fn del_doc_words(self, writer: &mut heed::RwTxn, document_id: DocumentId) -> ZResult { - let document_id = BEU64::new(document_id.0); + let document_id = BEU32::new(document_id.0); self.docs_words.delete(writer, &document_id) } @@ -36,7 +36,7 @@ impl DocsWords { reader: &heed::RoTxn, document_id: DocumentId, ) -> ZResult> { - let document_id = BEU64::new(document_id.0); + let document_id = BEU32::new(document_id.0); match self.docs_words.get(reader, &document_id)? { Some(bytes) => { let len = bytes.len(); diff --git a/meilisearch-core/src/store/documents_ids.rs b/meilisearch-core/src/store/documents_ids.rs index d9d80d33c..a0628e9e2 100644 --- a/meilisearch-core/src/store/documents_ids.rs +++ b/meilisearch-core/src/store/documents_ids.rs @@ -26,16 +26,16 @@ impl<'a> BytesDecode<'a> for DocumentsIds { pub struct DiscoverIds<'a> { ids_iter: std::slice::Iter<'a, DocumentId>, - left_id: Option, - right_id: Option, - available_range: std::ops::Range, + left_id: Option, + right_id: Option, + available_range: std::ops::Range, } impl DiscoverIds<'_> { pub fn new(ids: &Set) -> DiscoverIds { let mut ids_iter = ids.iter(); let right_id = ids_iter.next().map(|id| id.0); - let available_range = 0..right_id.unwrap_or(u64::max_value()); + let available_range = 0..right_id.unwrap_or(u32::max_value()); DiscoverIds { ids_iter, left_id: None, right_id, available_range } } } @@ -49,7 +49,7 @@ impl Iterator for DiscoverIds<'_> { // The available range gives us a new id, we return it. Some(id) => return Some(DocumentId(id)), // The available range is exhausted, we need to find the next one. - None if self.available_range.end == u64::max_value() => return None, + None if self.available_range.end == u32::max_value() => return None, None => loop { self.left_id = self.right_id.take(); self.right_id = self.ids_iter.next().map(|id| id.0); @@ -61,9 +61,9 @@ impl Iterator for DiscoverIds<'_> { break; }, // The last used id has been reached, we can use all ids - // until u64 MAX + // until u32 MAX (Some(l), None) => { - self.available_range = l.saturating_add(1)..u64::max_value(); + self.available_range = l.saturating_add(1)..u32::max_value(); break; }, _ => (), diff --git a/meilisearch-core/src/store/main.rs b/meilisearch-core/src/store/main.rs index 1a7901d6f..3daf4c7f9 100644 --- a/meilisearch-core/src/store/main.rs +++ b/meilisearch-core/src/store/main.rs @@ -153,7 +153,7 @@ impl Main { pub fn user_to_internal_id(self, reader: &heed::RoTxn, userid: &str) -> ZResult> { let user_ids = self.user_ids(reader)?; - Ok(user_ids.get(userid).map(DocumentId)) + Ok(user_ids.get(userid).map(|id| DocumentId(id as u32))) } pub fn put_words_fst(self, writer: &mut heed::RwTxn, fst: &fst::Set) -> ZResult<()> { diff --git a/meilisearch-core/src/store/mod.rs b/meilisearch-core/src/store/mod.rs index 8c200be22..d19d3f7d3 100644 --- a/meilisearch-core/src/store/mod.rs +++ b/meilisearch-core/src/store/mod.rs @@ -45,20 +45,21 @@ use crate::serde::Deserializer; use crate::settings::SettingsUpdate; use crate::{query_builder::QueryBuilder, update, DocIndex, DocumentId, Error, MResult}; +type BEU32 = zerocopy::U32; type BEU64 = zerocopy::U64; pub type BEU16 = zerocopy::U16; #[derive(Debug, Copy, Clone, AsBytes, FromBytes)] #[repr(C)] pub struct DocumentFieldIndexedKey { - docid: BEU64, + docid: BEU32, indexed_pos: BEU16, } impl DocumentFieldIndexedKey { fn new(docid: DocumentId, indexed_pos: IndexedPos) -> DocumentFieldIndexedKey { DocumentFieldIndexedKey { - docid: BEU64::new(docid.0), + docid: BEU32::new(docid.0), indexed_pos: BEU16::new(indexed_pos.0), } } @@ -67,14 +68,14 @@ impl DocumentFieldIndexedKey { #[derive(Debug, Copy, Clone, AsBytes, FromBytes)] #[repr(C)] pub struct DocumentFieldStoredKey { - docid: BEU64, + docid: BEU32, field_id: BEU16, } impl DocumentFieldStoredKey { fn new(docid: DocumentId, field_id: FieldId) -> DocumentFieldStoredKey { DocumentFieldStoredKey { - docid: BEU64::new(docid.0), + docid: BEU32::new(docid.0), field_id: BEU16::new(field_id.0), } } @@ -98,7 +99,7 @@ impl<'a> BytesEncode<'a> for PostingsCodec { let mut buffer = Vec::with_capacity(u64_size + docids_size + matches_size); - let docids_len = item.docids.len(); + let docids_len = item.docids.len() as u64; buffer.extend_from_slice(&docids_len.to_be_bytes()); buffer.extend_from_slice(item.docids.as_bytes()); buffer.extend_from_slice(item.matches.as_bytes()); diff --git a/meilisearch-core/src/store/prefix_documents_cache.rs b/meilisearch-core/src/store/prefix_documents_cache.rs index 8cde30e95..2bb8700dc 100644 --- a/meilisearch-core/src/store/prefix_documents_cache.rs +++ b/meilisearch-core/src/store/prefix_documents_cache.rs @@ -4,7 +4,7 @@ use heed::types::{OwnedType, CowSlice}; use heed::Result as ZResult; use zerocopy::{AsBytes, FromBytes}; -use super::BEU64; +use super::{BEU64, BEU32}; use crate::{DocumentId, Highlight}; use crate::database::MainT; @@ -13,15 +13,15 @@ use crate::database::MainT; pub struct PrefixKey { prefix: [u8; 4], index: BEU64, - docid: BEU64, + docid: BEU32, } impl PrefixKey { - pub fn new(prefix: [u8; 4], index: u64, docid: u64) -> PrefixKey { + pub fn new(prefix: [u8; 4], index: u64, docid: u32) -> PrefixKey { PrefixKey { prefix, index: BEU64::new(index), - docid: BEU64::new(docid), + docid: BEU32::new(docid), } } } @@ -54,7 +54,7 @@ impl PrefixDocumentsCache { prefix: [u8; 4], ) -> ZResult> { let start = PrefixKey::new(prefix, 0, 0); - let end = PrefixKey::new(prefix, u64::max_value(), u64::max_value()); + let end = PrefixKey::new(prefix, u64::max_value(), u32::max_value()); let iter = self.prefix_documents_cache.range(reader, &(start..=end))?; Ok(PrefixDocumentsIter { iter }) } diff --git a/meilisearch-core/src/update/documents_addition.rs b/meilisearch-core/src/update/documents_addition.rs index d79446678..bd0945516 100644 --- a/meilisearch-core/src/update/documents_addition.rs +++ b/meilisearch-core/src/update/documents_addition.rs @@ -242,7 +242,7 @@ pub fn apply_addition<'a, 'b>( index.main.put_schema(writer, &schema)?; - let new_user_ids = fst::Map::from_iter(new_user_ids)?; + let new_user_ids = fst::Map::from_iter(new_user_ids.iter().map(|(u, i)| (u, *i as u64)))?; let new_internal_ids = sdset::SetBuf::from_dirty(new_internal_ids); index.main.merge_user_ids(writer, &new_user_ids)?; index.main.merge_internal_ids(writer, &new_internal_ids)?; diff --git a/meilisearch-core/src/update/documents_deletion.rs b/meilisearch-core/src/update/documents_deletion.rs index 369e6901c..cba19ed1a 100644 --- a/meilisearch-core/src/update/documents_deletion.rs +++ b/meilisearch-core/src/update/documents_deletion.rs @@ -80,7 +80,7 @@ pub fn apply_documents_deletion( let user_ids = index.main.user_ids(writer)?; for userid in new_user_ids.as_slice() { if let Some(id) = user_ids.get(userid) { - internal_ids.push(DocumentId(id)); + internal_ids.push(DocumentId(id as u32)); } } diff --git a/meilisearch-core/src/update/helpers.rs b/meilisearch-core/src/update/helpers.rs index 7f9a7b634..d7207e54c 100644 --- a/meilisearch-core/src/update/helpers.rs +++ b/meilisearch-core/src/update/helpers.rs @@ -105,7 +105,7 @@ pub fn discover_document_id( { if userid.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') { match user_ids.get(userid) { - Some(internal_id) => Ok(DocumentId(internal_id)), + Some(id) => Ok(DocumentId(id as u32)), None => { let internal_id = available_ids.next().expect("no more ids available"); Ok(internal_id) diff --git a/meilisearch-http/src/error.rs b/meilisearch-http/src/error.rs index 295931454..27626c80b 100644 --- a/meilisearch-http/src/error.rs +++ b/meilisearch-http/src/error.rs @@ -22,7 +22,7 @@ pub enum ResponseError { NotFound(String), OpenIndex(String), FilterParsing(String), - RetrieveDocument(u64, String), + RetrieveDocument(u32, String), SearchDocuments(String), PayloadTooLarge, UnsupportedMediaType, @@ -116,7 +116,7 @@ impl ResponseError { ResponseError::Maintenance } - pub fn retrieve_document(doc_id: u64, err: impl fmt::Display) -> ResponseError { + pub fn retrieve_document(doc_id: u32, err: impl fmt::Display) -> ResponseError { ResponseError::RetrieveDocument(doc_id, err.to_string()) } diff --git a/meilisearch-types/src/lib.rs b/meilisearch-types/src/lib.rs index fac361415..3e14521e0 100644 --- a/meilisearch-types/src/lib.rs +++ b/meilisearch-types/src/lib.rs @@ -12,7 +12,7 @@ use serde::{Deserialize, Serialize}; #[cfg_attr(feature = "zerocopy", derive(AsBytes, FromBytes))] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[repr(C)] -pub struct DocumentId(pub u64); +pub struct DocumentId(pub u32); /// This structure represent the position of a word /// in a document and its attributes. From 7bbb10155542a2b5f84d348db43869526c287f1f Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Wed, 20 May 2020 10:33:01 +0200 Subject: [PATCH 6/8] Prefix the attributes_for_faceting key name --- meilisearch-core/src/store/main.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/meilisearch-core/src/store/main.rs b/meilisearch-core/src/store/main.rs index 3daf4c7f9..b94f4a331 100644 --- a/meilisearch-core/src/store/main.rs +++ b/meilisearch-core/src/store/main.rs @@ -14,7 +14,7 @@ use crate::RankedMap; use crate::settings::RankingRule; use super::{CowSet, DocumentsIds}; -const ATTRIBUTES_FOR_FACETING: &str = "attributes-for-faceting"; +const ATTRIBUTES_FOR_FACETING_KEY: &str = "attributes-for-faceting"; const CREATED_AT_KEY: &str = "created-at"; const CUSTOMS_KEY: &str = "customs"; const DISTINCT_ATTRIBUTE_KEY: &str = "distinct-attribute"; @@ -277,15 +277,15 @@ impl Main { } pub fn attributes_for_faceting<'txn>(&self, reader: &'txn heed::RoTxn) -> ZResult>>> { - self.main.get::<_, Str, CowSet>(reader, ATTRIBUTES_FOR_FACETING) + self.main.get::<_, Str, CowSet>(reader, ATTRIBUTES_FOR_FACETING_KEY) } pub fn put_attributes_for_faceting(self, writer: &mut heed::RwTxn, attributes: &Set) -> ZResult<()> { - self.main.put::<_, Str, CowSet>(writer, ATTRIBUTES_FOR_FACETING, attributes) + self.main.put::<_, Str, CowSet>(writer, ATTRIBUTES_FOR_FACETING_KEY, attributes) } pub fn delete_attributes_for_faceting(self, writer: &mut heed::RwTxn) -> ZResult { - self.main.delete::<_, Str>(writer, ATTRIBUTES_FOR_FACETING) + self.main.delete::<_, Str>(writer, ATTRIBUTES_FOR_FACETING_KEY) } pub fn ranking_rules(&self, reader: &heed::RoTxn) -> ZResult>> { From a60e3fb1cb542d763029eb8035f188cd215e976c Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Wed, 20 May 2020 14:49:41 +0200 Subject: [PATCH 7/8] Rename user ids into external docids --- meilisearch-core/src/store/main.rs | 64 +++++++++---------- meilisearch-core/src/update/clear_all.rs | 4 +- .../src/update/documents_addition.rs | 26 ++++---- .../src/update/documents_deletion.rs | 26 ++++---- meilisearch-http/src/routes/document.rs | 6 +- 5 files changed, 63 insertions(+), 63 deletions(-) diff --git a/meilisearch-core/src/store/main.rs b/meilisearch-core/src/store/main.rs index b94f4a331..97fd5c707 100644 --- a/meilisearch-core/src/store/main.rs +++ b/meilisearch-core/src/store/main.rs @@ -18,8 +18,9 @@ const ATTRIBUTES_FOR_FACETING_KEY: &str = "attributes-for-faceting"; const CREATED_AT_KEY: &str = "created-at"; const CUSTOMS_KEY: &str = "customs"; const DISTINCT_ATTRIBUTE_KEY: &str = "distinct-attribute"; +const EXTERNAL_DOCIDS_KEY: &str = "external-docids"; const FIELDS_FREQUENCY_KEY: &str = "fields-frequency"; -const INTERNAL_IDS_KEY: &str = "internal-ids"; +const INTERNAL_DOCIDS_KEY: &str = "internal-docids"; const NAME_KEY: &str = "name"; const NUMBER_OF_DOCUMENTS_KEY: &str = "number-of-documents"; const RANKED_MAP_KEY: &str = "ranked-map"; @@ -28,7 +29,6 @@ const SCHEMA_KEY: &str = "schema"; const STOP_WORDS_KEY: &str = "stop-words"; const SYNONYMS_KEY: &str = "synonyms"; const UPDATED_AT_KEY: &str = "updated-at"; -const USER_IDS_KEY: &str = "user-ids"; const WORDS_KEY: &str = "words"; pub type FreqsMap = HashMap; @@ -74,73 +74,73 @@ impl Main { self.main.get::<_, Str, SerdeDatetime>(reader, UPDATED_AT_KEY) } - pub fn put_internal_ids(self, writer: &mut heed::RwTxn, ids: &sdset::Set) -> ZResult<()> { - self.main.put::<_, Str, DocumentsIds>(writer, INTERNAL_IDS_KEY, ids) + pub fn put_internal_docids(self, writer: &mut heed::RwTxn, ids: &sdset::Set) -> ZResult<()> { + self.main.put::<_, Str, DocumentsIds>(writer, INTERNAL_DOCIDS_KEY, ids) } - pub fn internal_ids<'txn>(self, reader: &'txn heed::RoTxn) -> ZResult>> { - match self.main.get::<_, Str, DocumentsIds>(reader, INTERNAL_IDS_KEY)? { + pub fn internal_docids<'txn>(self, reader: &'txn heed::RoTxn) -> ZResult>> { + match self.main.get::<_, Str, DocumentsIds>(reader, INTERNAL_DOCIDS_KEY)? { Some(ids) => Ok(ids), None => Ok(Cow::default()), } } - pub fn merge_internal_ids(self, writer: &mut heed::RwTxn, new_ids: &sdset::Set) -> ZResult<()> { + pub fn merge_internal_docids(self, writer: &mut heed::RwTxn, new_ids: &sdset::Set) -> ZResult<()> { use sdset::SetOperation; // We do an union of the old and new internal ids. - let internal_ids = self.internal_ids(writer)?; - let internal_ids = sdset::duo::Union::new(&internal_ids, new_ids).into_set_buf(); - self.put_internal_ids(writer, &internal_ids) + let internal_docids = self.internal_docids(writer)?; + let internal_docids = sdset::duo::Union::new(&internal_docids, new_ids).into_set_buf(); + self.put_internal_docids(writer, &internal_docids) } - pub fn remove_internal_ids(self, writer: &mut heed::RwTxn, ids: &sdset::Set) -> ZResult<()> { + pub fn remove_internal_docids(self, writer: &mut heed::RwTxn, ids: &sdset::Set) -> ZResult<()> { use sdset::SetOperation; // We do a difference of the old and new internal ids. - let internal_ids = self.internal_ids(writer)?; - let internal_ids = sdset::duo::Difference::new(&internal_ids, ids).into_set_buf(); - self.put_internal_ids(writer, &internal_ids) + let internal_docids = self.internal_docids(writer)?; + let internal_docids = sdset::duo::Difference::new(&internal_docids, ids).into_set_buf(); + self.put_internal_docids(writer, &internal_docids) } - pub fn put_user_ids(self, writer: &mut heed::RwTxn, ids: &fst::Map) -> ZResult<()> { - self.main.put::<_, Str, ByteSlice>(writer, USER_IDS_KEY, ids.as_fst().as_bytes()) + pub fn put_external_docids(self, writer: &mut heed::RwTxn, ids: &fst::Map) -> ZResult<()> { + self.main.put::<_, Str, ByteSlice>(writer, EXTERNAL_DOCIDS_KEY, ids.as_fst().as_bytes()) } - pub fn merge_user_ids(self, writer: &mut heed::RwTxn, new_ids: &fst::Map) -> ZResult<()> { + pub fn merge_external_docids(self, writer: &mut heed::RwTxn, new_ids: &fst::Map) -> ZResult<()> { use fst::{Streamer, IntoStreamer}; // Do an union of the old and the new set of user ids. - let user_ids = self.user_ids(writer)?; - let mut op = user_ids.op().add(new_ids.into_stream()).r#union(); + let external_docids = self.external_docids(writer)?; + let mut op = external_docids.op().add(new_ids.into_stream()).r#union(); let mut build = fst::MapBuilder::memory(); while let Some((userid, values)) = op.next() { build.insert(userid, values[0].value).unwrap(); } - let user_ids = build.into_inner().unwrap(); + let external_docids = build.into_inner().unwrap(); // TODO prefer using self.put_user_ids - self.main.put::<_, Str, ByteSlice>(writer, USER_IDS_KEY, user_ids.as_slice()) + self.main.put::<_, Str, ByteSlice>(writer, EXTERNAL_DOCIDS_KEY, external_docids.as_slice()) } - pub fn remove_user_ids(self, writer: &mut heed::RwTxn, ids: &fst::Map) -> ZResult<()> { + pub fn remove_external_docids(self, writer: &mut heed::RwTxn, ids: &fst::Map) -> ZResult<()> { use fst::{Streamer, IntoStreamer}; // Do an union of the old and the new set of user ids. - let user_ids = self.user_ids(writer)?; - let mut op = user_ids.op().add(ids.into_stream()).difference(); + let external_docids = self.external_docids(writer)?; + let mut op = external_docids.op().add(ids.into_stream()).difference(); let mut build = fst::MapBuilder::memory(); while let Some((userid, values)) = op.next() { build.insert(userid, values[0].value).unwrap(); } - let user_ids = build.into_inner().unwrap(); + let external_docids = build.into_inner().unwrap(); - // TODO prefer using self.put_user_ids - self.main.put::<_, Str, ByteSlice>(writer, USER_IDS_KEY, user_ids.as_slice()) + // TODO prefer using self.put_external_docids + self.main.put::<_, Str, ByteSlice>(writer, EXTERNAL_DOCIDS_KEY, external_docids.as_slice()) } - pub fn user_ids(self, reader: &heed::RoTxn) -> ZResult { - match self.main.get::<_, Str, ByteSlice>(reader, USER_IDS_KEY)? { + pub fn external_docids(self, reader: &heed::RoTxn) -> ZResult { + match self.main.get::<_, Str, ByteSlice>(reader, EXTERNAL_DOCIDS_KEY)? { Some(bytes) => { let len = bytes.len(); let bytes = Arc::new(bytes.to_owned()); @@ -151,9 +151,9 @@ impl Main { } } - pub fn user_to_internal_id(self, reader: &heed::RoTxn, userid: &str) -> ZResult> { - let user_ids = self.user_ids(reader)?; - Ok(user_ids.get(userid).map(|id| DocumentId(id as u32))) + pub fn external_to_internal_docid(self, reader: &heed::RoTxn, external_docid: &str) -> ZResult> { + let external_ids = self.external_docids(reader)?; + Ok(external_ids.get(external_docid).map(|id| DocumentId(id as u32))) } pub fn put_words_fst(self, writer: &mut heed::RwTxn, fst: &fst::Set) -> ZResult<()> { diff --git a/meilisearch-core/src/update/clear_all.rs b/meilisearch-core/src/update/clear_all.rs index c393b8a6c..a4c5780b9 100644 --- a/meilisearch-core/src/update/clear_all.rs +++ b/meilisearch-core/src/update/clear_all.rs @@ -7,8 +7,8 @@ pub fn apply_clear_all( index: &store::Index, ) -> MResult<()> { index.main.put_words_fst(writer, &fst::Set::default())?; - index.main.put_user_ids(writer, &fst::Map::default())?; - index.main.put_internal_ids(writer, &sdset::SetBuf::default())?; + index.main.put_external_docids(writer, &fst::Map::default())?; + index.main.put_internal_docids(writer, &sdset::SetBuf::default())?; index.main.put_ranked_map(writer, &RankedMap::default())?; index.main.put_number_of_documents(writer, |_| 0)?; index.documents_fields.clear(writer)?; diff --git a/meilisearch-core/src/update/documents_addition.rs b/meilisearch-core/src/update/documents_addition.rs index bd0945516..c63ce5b2a 100644 --- a/meilisearch-core/src/update/documents_addition.rs +++ b/meilisearch-core/src/update/documents_addition.rs @@ -150,8 +150,8 @@ pub fn apply_addition<'a, 'b>( partial: bool ) -> MResult<()> { let mut documents_additions = HashMap::new(); - let mut new_user_ids = BTreeMap::new(); - let mut new_internal_ids = Vec::with_capacity(new_documents.len()); + let mut new_external_docids = BTreeMap::new(); + let mut new_internal_docids = Vec::with_capacity(new_documents.len()); let mut schema = match index.main.schema(writer)? { Some(schema) => schema, @@ -159,17 +159,17 @@ pub fn apply_addition<'a, 'b>( }; // Retrieve the documents ids related structures - let user_ids = index.main.user_ids(writer)?; - let internal_ids = index.main.internal_ids(writer)?; - let mut available_ids = DiscoverIds::new(&internal_ids); + let external_docids = index.main.external_docids(writer)?; + let internal_docids = index.main.internal_docids(writer)?; + let mut available_ids = DiscoverIds::new(&internal_docids); let primary_key = schema.primary_key().ok_or(Error::MissingPrimaryKey)?; // 1. store documents ids for future deletion for mut document in new_documents { - let (document_id, userid) = extract_document_id(&primary_key, &document, &user_ids, &mut available_ids)?; - new_user_ids.insert(userid, document_id.0); - new_internal_ids.push(document_id); + let (document_id, userid) = extract_document_id(&primary_key, &document, &external_docids, &mut available_ids)?; + new_external_docids.insert(userid, document_id.0); + new_internal_docids.push(document_id); if partial { let mut deserializer = Deserializer { @@ -192,7 +192,7 @@ pub fn apply_addition<'a, 'b>( // 2. remove the documents postings lists let number_of_inserted_documents = documents_additions.len(); - let documents_ids = new_user_ids.iter().map(|(userid, _)| userid.clone()).collect(); + let documents_ids = new_external_docids.iter().map(|(id, _)| id.clone()).collect(); apply_documents_deletion(writer, index, documents_ids)?; let mut ranked_map = match index.main.ranked_map(writer)? { @@ -242,10 +242,10 @@ pub fn apply_addition<'a, 'b>( index.main.put_schema(writer, &schema)?; - let new_user_ids = fst::Map::from_iter(new_user_ids.iter().map(|(u, i)| (u, *i as u64)))?; - let new_internal_ids = sdset::SetBuf::from_dirty(new_internal_ids); - index.main.merge_user_ids(writer, &new_user_ids)?; - index.main.merge_internal_ids(writer, &new_internal_ids)?; + let new_external_docids = fst::Map::from_iter(new_external_docids.iter().map(|(u, i)| (u, *i as u64)))?; + let new_internal_docids = sdset::SetBuf::from_dirty(new_internal_docids); + index.main.merge_external_docids(writer, &new_external_docids)?; + index.main.merge_internal_docids(writer, &new_internal_docids)?; Ok(()) } diff --git a/meilisearch-core/src/update/documents_deletion.rs b/meilisearch-core/src/update/documents_deletion.rs index cba19ed1a..c990260db 100644 --- a/meilisearch-core/src/update/documents_deletion.rs +++ b/meilisearch-core/src/update/documents_deletion.rs @@ -31,7 +31,7 @@ impl DocumentsDeletion { } } - pub fn delete_document_by_user_id(&mut self, document_id: String) { + pub fn delete_document_by_external_docid(&mut self, document_id: String) { self.documents.push(document_id); } @@ -73,19 +73,19 @@ pub fn apply_documents_deletion( deletion: Vec, ) -> MResult<()> { - let (user_ids, internal_ids) = { - let new_user_ids = SetBuf::from_dirty(deletion); - let mut internal_ids = Vec::new(); + let (external_docids, internal_docids) = { + let new_external_docids = SetBuf::from_dirty(deletion); + let mut internal_docids = Vec::new(); - let user_ids = index.main.user_ids(writer)?; - for userid in new_user_ids.as_slice() { + let user_ids = index.main.external_docids(writer)?; + for userid in new_external_docids.as_slice() { if let Some(id) = user_ids.get(userid) { - internal_ids.push(DocumentId(id as u32)); + internal_docids.push(DocumentId(id as u32)); } } - let new_user_ids = fst::Map::from_iter(new_user_ids.into_iter().map(|k| (k, 0))).unwrap(); - (new_user_ids, SetBuf::from_dirty(internal_ids)) + let new_external_docids = fst::Map::from_iter(new_external_docids.into_iter().map(|k| (k, 0))).unwrap(); + (new_external_docids, SetBuf::from_dirty(internal_docids)) }; let schema = match index.main.schema(writer)? { @@ -100,7 +100,7 @@ pub fn apply_documents_deletion( // facet filters deletion if let Some(attributes_for_facetting) = index.main.attributes_for_faceting(writer)? { - let facet_map = facets::facet_map_from_docids(writer, &index, &internal_ids, &attributes_for_facetting)?; + let facet_map = facets::facet_map_from_docids(writer, &index, &internal_docids, &attributes_for_facetting)?; index.facets.remove(writer, facet_map)?; } @@ -108,7 +108,7 @@ pub fn apply_documents_deletion( let ranked_fields = schema.ranked(); let mut words_document_ids = HashMap::new(); - for id in internal_ids.iter().cloned() { + for id in internal_docids.iter().cloned() { // remove all the ranked attributes from the ranked_map for ranked_attr in ranked_fields { ranked_map.remove(id, *ranked_attr); @@ -179,8 +179,8 @@ pub fn apply_documents_deletion( index.main.put_number_of_documents(writer, |old| old - deleted_documents_len)?; // We apply the changes to the user and internal ids - index.main.remove_user_ids(writer, &user_ids)?; - index.main.remove_internal_ids(writer, &internal_ids)?; + index.main.remove_external_docids(writer, &external_docids)?; + index.main.remove_internal_docids(writer, &internal_docids)?; compute_short_prefixes(writer, index)?; diff --git a/meilisearch-http/src/routes/document.rs b/meilisearch-http/src/routes/document.rs index d165b5b39..bd89f7be2 100644 --- a/meilisearch-http/src/routes/document.rs +++ b/meilisearch-http/src/routes/document.rs @@ -44,7 +44,7 @@ async fn get_document( .ok_or(ResponseError::index_not_found(&path.index_uid))?; let reader = data.db.main_read_txn()?; - let internal_id = index.main.user_to_internal_id(&reader, &path.document_id)?; + let internal_id = index.main.external_to_internal_docid(&reader, &path.document_id)?; let internal_id = match internal_id { Some(internal_id) => internal_id, @@ -74,7 +74,7 @@ async fn delete_document( let mut update_writer = data.db.update_write_txn()?; let mut documents_deletion = index.documents_deletion(); - documents_deletion.delete_document_by_user_id(path.document_id.clone()); + documents_deletion.delete_document_by_external_docid(path.document_id.clone()); let update_id = documents_deletion.finalize(&mut update_writer)?; @@ -242,7 +242,7 @@ async fn delete_documents( for document_id in body.into_inner() { let document_id = update::value_to_string(&document_id); - documents_deletion.delete_document_by_user_id(document_id); + documents_deletion.delete_document_by_external_docid(document_id); } let update_id = documents_deletion.finalize(&mut writer)?; From ddeb5745be785eb184c82f5e850965df254b1664 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Wed, 20 May 2020 15:21:08 +0200 Subject: [PATCH 8/8] Refactor a little bit --- meilisearch-core/src/store/main.rs | 16 +++++----- .../src/update/documents_addition.rs | 30 ++++++++++++------- .../src/update/documents_deletion.rs | 24 +++++++-------- meilisearch-core/src/update/helpers.rs | 20 ++++++------- meilisearch-http/src/routes/document.rs | 9 ++---- 5 files changed, 52 insertions(+), 47 deletions(-) diff --git a/meilisearch-core/src/store/main.rs b/meilisearch-core/src/store/main.rs index 97fd5c707..d1f7a522c 100644 --- a/meilisearch-core/src/store/main.rs +++ b/meilisearch-core/src/store/main.rs @@ -107,15 +107,15 @@ impl Main { self.main.put::<_, Str, ByteSlice>(writer, EXTERNAL_DOCIDS_KEY, ids.as_fst().as_bytes()) } - pub fn merge_external_docids(self, writer: &mut heed::RwTxn, new_ids: &fst::Map) -> ZResult<()> { + pub fn merge_external_docids(self, writer: &mut heed::RwTxn, new_docids: &fst::Map) -> ZResult<()> { use fst::{Streamer, IntoStreamer}; - // Do an union of the old and the new set of user ids. + // Do an union of the old and the new set of external docids. let external_docids = self.external_docids(writer)?; - let mut op = external_docids.op().add(new_ids.into_stream()).r#union(); + let mut op = external_docids.op().add(new_docids.into_stream()).r#union(); let mut build = fst::MapBuilder::memory(); - while let Some((userid, values)) = op.next() { - build.insert(userid, values[0].value).unwrap(); + while let Some((docid, values)) = op.next() { + build.insert(docid, values[0].value).unwrap(); } let external_docids = build.into_inner().unwrap(); @@ -126,12 +126,12 @@ impl Main { pub fn remove_external_docids(self, writer: &mut heed::RwTxn, ids: &fst::Map) -> ZResult<()> { use fst::{Streamer, IntoStreamer}; - // Do an union of the old and the new set of user ids. + // Do an union of the old and the new set of external docids. let external_docids = self.external_docids(writer)?; let mut op = external_docids.op().add(ids.into_stream()).difference(); let mut build = fst::MapBuilder::memory(); - while let Some((userid, values)) = op.next() { - build.insert(userid, values[0].value).unwrap(); + while let Some((docid, values)) = op.next() { + build.insert(docid, values[0].value).unwrap(); } let external_docids = build.into_inner().unwrap(); diff --git a/meilisearch-core/src/update/documents_addition.rs b/meilisearch-core/src/update/documents_addition.rs index c63ce5b2a..6858f6d2c 100644 --- a/meilisearch-core/src/update/documents_addition.rs +++ b/meilisearch-core/src/update/documents_addition.rs @@ -148,11 +148,8 @@ pub fn apply_addition<'a, 'b>( index: &store::Index, new_documents: Vec>, partial: bool -) -> MResult<()> { - let mut documents_additions = HashMap::new(); - let mut new_external_docids = BTreeMap::new(); - let mut new_internal_docids = Vec::with_capacity(new_documents.len()); - +) -> MResult<()> +{ let mut schema = match index.main.schema(writer)? { Some(schema) => schema, None => return Err(Error::SchemaMissing), @@ -166,14 +163,25 @@ pub fn apply_addition<'a, 'b>( let primary_key = schema.primary_key().ok_or(Error::MissingPrimaryKey)?; // 1. store documents ids for future deletion + let mut documents_additions = HashMap::new(); + let mut new_external_docids = BTreeMap::new(); + let mut new_internal_docids = Vec::with_capacity(new_documents.len()); + for mut document in new_documents { - let (document_id, userid) = extract_document_id(&primary_key, &document, &external_docids, &mut available_ids)?; - new_external_docids.insert(userid, document_id.0); - new_internal_docids.push(document_id); + let (internal_docid, external_docid) = + extract_document_id( + &primary_key, + &document, + &external_docids, + &mut available_ids, + )?; + + new_external_docids.insert(external_docid, internal_docid.0); + new_internal_docids.push(internal_docid); if partial { let mut deserializer = Deserializer { - document_id, + document_id: internal_docid, reader: writer, documents_fields: index.documents_fields, schema: &schema, @@ -187,7 +195,7 @@ pub fn apply_addition<'a, 'b>( } } } - documents_additions.insert(document_id, document); + documents_additions.insert(internal_docid, document); } // 2. remove the documents postings lists @@ -242,7 +250,7 @@ pub fn apply_addition<'a, 'b>( index.main.put_schema(writer, &schema)?; - let new_external_docids = fst::Map::from_iter(new_external_docids.iter().map(|(u, i)| (u, *i as u64)))?; + let new_external_docids = fst::Map::from_iter(new_external_docids.iter().map(|(ext, id)| (ext, *id as u64)))?; let new_internal_docids = sdset::SetBuf::from_dirty(new_internal_docids); index.main.merge_external_docids(writer, &new_external_docids)?; index.main.merge_internal_docids(writer, &new_internal_docids)?; diff --git a/meilisearch-core/src/update/documents_deletion.rs b/meilisearch-core/src/update/documents_deletion.rs index c990260db..a45021cae 100644 --- a/meilisearch-core/src/update/documents_deletion.rs +++ b/meilisearch-core/src/update/documents_deletion.rs @@ -14,7 +14,7 @@ pub struct DocumentsDeletion { updates_store: store::Updates, updates_results_store: store::UpdatesResults, updates_notifier: UpdateEventsEmitter, - documents: Vec, + external_docids: Vec, } impl DocumentsDeletion { @@ -27,12 +27,12 @@ impl DocumentsDeletion { updates_store, updates_results_store, updates_notifier, - documents: Vec::new(), + external_docids: Vec::new(), } } pub fn delete_document_by_external_docid(&mut self, document_id: String) { - self.documents.push(document_id); + self.external_docids.push(document_id); } pub fn finalize(self, writer: &mut heed::RwTxn) -> MResult { @@ -41,7 +41,7 @@ impl DocumentsDeletion { writer, self.updates_store, self.updates_results_store, - self.documents, + self.external_docids, )?; Ok(update_id) } @@ -49,7 +49,7 @@ impl DocumentsDeletion { impl Extend for DocumentsDeletion { fn extend>(&mut self, iter: T) { - self.documents.extend(iter) + self.external_docids.extend(iter) } } @@ -57,11 +57,11 @@ pub fn push_documents_deletion( writer: &mut heed::RwTxn, updates_store: store::Updates, updates_results_store: store::UpdatesResults, - deletion: Vec, + external_docids: Vec, ) -> MResult { let last_update_id = next_update_id(writer, updates_store, updates_results_store)?; - let update = Update::documents_deletion(deletion); + let update = Update::documents_deletion(external_docids); updates_store.put_update(writer, last_update_id, &update)?; Ok(last_update_id) @@ -70,16 +70,16 @@ pub fn push_documents_deletion( pub fn apply_documents_deletion( writer: &mut heed::RwTxn, index: &store::Index, - deletion: Vec, + external_docids: Vec, ) -> MResult<()> { let (external_docids, internal_docids) = { - let new_external_docids = SetBuf::from_dirty(deletion); + let new_external_docids = SetBuf::from_dirty(external_docids); let mut internal_docids = Vec::new(); - let user_ids = index.main.external_docids(writer)?; - for userid in new_external_docids.as_slice() { - if let Some(id) = user_ids.get(userid) { + let old_external_docids = index.main.external_docids(writer)?; + for external_docid in new_external_docids.as_slice() { + if let Some(id) = old_external_docids.get(external_docid) { internal_docids.push(DocumentId(id as u32)); } } diff --git a/meilisearch-core/src/update/helpers.rs b/meilisearch-core/src/update/helpers.rs index d7207e54c..c7bd05cec 100644 --- a/meilisearch-core/src/update/helpers.rs +++ b/meilisearch-core/src/update/helpers.rs @@ -98,16 +98,16 @@ pub fn value_to_number(value: &Value) -> Option { /// Validates a string representation to be a correct document id and returns /// the corresponding id or generate a new one, this is the way we produce documents ids. pub fn discover_document_id( - userid: &str, - user_ids: &fst::Map, - available_ids: &mut DiscoverIds<'_>, + docid: &str, + external_docids: &fst::Map, + available_docids: &mut DiscoverIds<'_>, ) -> Result { - if userid.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') { - match user_ids.get(userid) { + if docid.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') { + match external_docids.get(docid) { Some(id) => Ok(DocumentId(id as u32)), None => { - let internal_id = available_ids.next().expect("no more ids available"); + let internal_id = available_docids.next().expect("no more ids available"); Ok(internal_id) }, } @@ -120,18 +120,18 @@ pub fn discover_document_id( pub fn extract_document_id( primary_key: &str, document: &IndexMap, - user_ids: &fst::Map, - available_ids: &mut DiscoverIds<'_>, + external_docids: &fst::Map, + available_docids: &mut DiscoverIds<'_>, ) -> Result<(DocumentId, String), SerializerError> { match document.get(primary_key) { Some(value) => { - let userid = match value { + let docid = match value { Value::Number(number) => number.to_string(), Value::String(string) => string.clone(), _ => return Err(SerializerError::InvalidDocumentIdFormat), }; - discover_document_id(&userid, user_ids, available_ids).map(|id| (id, userid)) + discover_document_id(&docid, external_docids, available_docids).map(|id| (id, docid)) } None => Err(SerializerError::DocumentIdNotFound), } diff --git a/meilisearch-http/src/routes/document.rs b/meilisearch-http/src/routes/document.rs index bd89f7be2..eca88a590 100644 --- a/meilisearch-http/src/routes/document.rs +++ b/meilisearch-http/src/routes/document.rs @@ -44,12 +44,9 @@ async fn get_document( .ok_or(ResponseError::index_not_found(&path.index_uid))?; let reader = data.db.main_read_txn()?; - let internal_id = index.main.external_to_internal_docid(&reader, &path.document_id)?; - - let internal_id = match internal_id { - Some(internal_id) => internal_id, - None => return Err(ResponseError::document_not_found(&path.document_id)), - }; + let internal_id = index.main + .external_to_internal_docid(&reader, &path.document_id)? + .ok_or(ResponseError::document_not_found(&path.document_id))?; let response: Document = index .document(&reader, None, internal_id)?