Rename user ids into external docids

This commit is contained in:
Kerollmops 2020-05-20 14:49:41 +02:00
parent 7bbb101555
commit a60e3fb1cb
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
5 changed files with 63 additions and 63 deletions

View File

@ -18,8 +18,9 @@ const ATTRIBUTES_FOR_FACETING_KEY: &str = "attributes-for-faceting";
const CREATED_AT_KEY: &str = "created-at"; const CREATED_AT_KEY: &str = "created-at";
const CUSTOMS_KEY: &str = "customs"; const CUSTOMS_KEY: &str = "customs";
const DISTINCT_ATTRIBUTE_KEY: &str = "distinct-attribute"; const DISTINCT_ATTRIBUTE_KEY: &str = "distinct-attribute";
const EXTERNAL_DOCIDS_KEY: &str = "external-docids";
const FIELDS_FREQUENCY_KEY: &str = "fields-frequency"; const FIELDS_FREQUENCY_KEY: &str = "fields-frequency";
const INTERNAL_IDS_KEY: &str = "internal-ids"; const INTERNAL_DOCIDS_KEY: &str = "internal-docids";
const NAME_KEY: &str = "name"; const NAME_KEY: &str = "name";
const NUMBER_OF_DOCUMENTS_KEY: &str = "number-of-documents"; const NUMBER_OF_DOCUMENTS_KEY: &str = "number-of-documents";
const RANKED_MAP_KEY: &str = "ranked-map"; const RANKED_MAP_KEY: &str = "ranked-map";
@ -28,7 +29,6 @@ const SCHEMA_KEY: &str = "schema";
const STOP_WORDS_KEY: &str = "stop-words"; const STOP_WORDS_KEY: &str = "stop-words";
const SYNONYMS_KEY: &str = "synonyms"; const SYNONYMS_KEY: &str = "synonyms";
const UPDATED_AT_KEY: &str = "updated-at"; const UPDATED_AT_KEY: &str = "updated-at";
const USER_IDS_KEY: &str = "user-ids";
const WORDS_KEY: &str = "words"; const WORDS_KEY: &str = "words";
pub type FreqsMap = HashMap<String, usize>; pub type FreqsMap = HashMap<String, usize>;
@ -74,73 +74,73 @@ impl Main {
self.main.get::<_, Str, SerdeDatetime>(reader, UPDATED_AT_KEY) self.main.get::<_, Str, SerdeDatetime>(reader, UPDATED_AT_KEY)
} }
pub fn put_internal_ids(self, writer: &mut heed::RwTxn<MainT>, ids: &sdset::Set<DocumentId>) -> ZResult<()> { pub fn put_internal_docids(self, writer: &mut heed::RwTxn<MainT>, ids: &sdset::Set<DocumentId>) -> ZResult<()> {
self.main.put::<_, Str, DocumentsIds>(writer, INTERNAL_IDS_KEY, ids) self.main.put::<_, Str, DocumentsIds>(writer, INTERNAL_DOCIDS_KEY, ids)
} }
pub fn internal_ids<'txn>(self, reader: &'txn heed::RoTxn<MainT>) -> ZResult<Cow<'txn, sdset::Set<DocumentId>>> { pub fn internal_docids<'txn>(self, reader: &'txn heed::RoTxn<MainT>) -> ZResult<Cow<'txn, sdset::Set<DocumentId>>> {
match self.main.get::<_, Str, DocumentsIds>(reader, INTERNAL_IDS_KEY)? { match self.main.get::<_, Str, DocumentsIds>(reader, INTERNAL_DOCIDS_KEY)? {
Some(ids) => Ok(ids), Some(ids) => Ok(ids),
None => Ok(Cow::default()), None => Ok(Cow::default()),
} }
} }
pub fn merge_internal_ids(self, writer: &mut heed::RwTxn<MainT>, new_ids: &sdset::Set<DocumentId>) -> ZResult<()> { pub fn merge_internal_docids(self, writer: &mut heed::RwTxn<MainT>, new_ids: &sdset::Set<DocumentId>) -> ZResult<()> {
use sdset::SetOperation; use sdset::SetOperation;
// We do an union of the old and new internal ids. // We do an union of the old and new internal ids.
let internal_ids = self.internal_ids(writer)?; let internal_docids = self.internal_docids(writer)?;
let internal_ids = sdset::duo::Union::new(&internal_ids, new_ids).into_set_buf(); let internal_docids = sdset::duo::Union::new(&internal_docids, new_ids).into_set_buf();
self.put_internal_ids(writer, &internal_ids) self.put_internal_docids(writer, &internal_docids)
} }
pub fn remove_internal_ids(self, writer: &mut heed::RwTxn<MainT>, ids: &sdset::Set<DocumentId>) -> ZResult<()> { pub fn remove_internal_docids(self, writer: &mut heed::RwTxn<MainT>, ids: &sdset::Set<DocumentId>) -> ZResult<()> {
use sdset::SetOperation; use sdset::SetOperation;
// We do a difference of the old and new internal ids. // We do a difference of the old and new internal ids.
let internal_ids = self.internal_ids(writer)?; let internal_docids = self.internal_docids(writer)?;
let internal_ids = sdset::duo::Difference::new(&internal_ids, ids).into_set_buf(); let internal_docids = sdset::duo::Difference::new(&internal_docids, ids).into_set_buf();
self.put_internal_ids(writer, &internal_ids) self.put_internal_docids(writer, &internal_docids)
} }
pub fn put_user_ids(self, writer: &mut heed::RwTxn<MainT>, ids: &fst::Map) -> ZResult<()> { pub fn put_external_docids(self, writer: &mut heed::RwTxn<MainT>, ids: &fst::Map) -> ZResult<()> {
self.main.put::<_, Str, ByteSlice>(writer, USER_IDS_KEY, ids.as_fst().as_bytes()) self.main.put::<_, Str, ByteSlice>(writer, EXTERNAL_DOCIDS_KEY, ids.as_fst().as_bytes())
} }
pub fn merge_user_ids(self, writer: &mut heed::RwTxn<MainT>, new_ids: &fst::Map) -> ZResult<()> { pub fn merge_external_docids(self, writer: &mut heed::RwTxn<MainT>, new_ids: &fst::Map) -> ZResult<()> {
use fst::{Streamer, IntoStreamer}; use fst::{Streamer, IntoStreamer};
// Do an union of the old and the new set of user ids. // Do an union of the old and the new set of user ids.
let user_ids = self.user_ids(writer)?; let external_docids = self.external_docids(writer)?;
let mut op = user_ids.op().add(new_ids.into_stream()).r#union(); let mut op = external_docids.op().add(new_ids.into_stream()).r#union();
let mut build = fst::MapBuilder::memory(); let mut build = fst::MapBuilder::memory();
while let Some((userid, values)) = op.next() { while let Some((userid, values)) = op.next() {
build.insert(userid, values[0].value).unwrap(); build.insert(userid, values[0].value).unwrap();
} }
let user_ids = build.into_inner().unwrap(); let external_docids = build.into_inner().unwrap();
// TODO prefer using self.put_user_ids // TODO prefer using self.put_user_ids
self.main.put::<_, Str, ByteSlice>(writer, USER_IDS_KEY, user_ids.as_slice()) self.main.put::<_, Str, ByteSlice>(writer, EXTERNAL_DOCIDS_KEY, external_docids.as_slice())
} }
pub fn remove_user_ids(self, writer: &mut heed::RwTxn<MainT>, ids: &fst::Map) -> ZResult<()> { pub fn remove_external_docids(self, writer: &mut heed::RwTxn<MainT>, ids: &fst::Map) -> ZResult<()> {
use fst::{Streamer, IntoStreamer}; use fst::{Streamer, IntoStreamer};
// Do an union of the old and the new set of user ids. // Do an union of the old and the new set of user ids.
let user_ids = self.user_ids(writer)?; let external_docids = self.external_docids(writer)?;
let mut op = user_ids.op().add(ids.into_stream()).difference(); let mut op = external_docids.op().add(ids.into_stream()).difference();
let mut build = fst::MapBuilder::memory(); let mut build = fst::MapBuilder::memory();
while let Some((userid, values)) = op.next() { while let Some((userid, values)) = op.next() {
build.insert(userid, values[0].value).unwrap(); build.insert(userid, values[0].value).unwrap();
} }
let user_ids = build.into_inner().unwrap(); let external_docids = build.into_inner().unwrap();
// TODO prefer using self.put_user_ids // TODO prefer using self.put_external_docids
self.main.put::<_, Str, ByteSlice>(writer, USER_IDS_KEY, user_ids.as_slice()) self.main.put::<_, Str, ByteSlice>(writer, EXTERNAL_DOCIDS_KEY, external_docids.as_slice())
} }
pub fn user_ids(self, reader: &heed::RoTxn<MainT>) -> ZResult<fst::Map> { pub fn external_docids(self, reader: &heed::RoTxn<MainT>) -> ZResult<fst::Map> {
match self.main.get::<_, Str, ByteSlice>(reader, USER_IDS_KEY)? { match self.main.get::<_, Str, ByteSlice>(reader, EXTERNAL_DOCIDS_KEY)? {
Some(bytes) => { Some(bytes) => {
let len = bytes.len(); let len = bytes.len();
let bytes = Arc::new(bytes.to_owned()); let bytes = Arc::new(bytes.to_owned());
@ -151,9 +151,9 @@ impl Main {
} }
} }
pub fn user_to_internal_id(self, reader: &heed::RoTxn<MainT>, userid: &str) -> ZResult<Option<DocumentId>> { pub fn external_to_internal_docid(self, reader: &heed::RoTxn<MainT>, external_docid: &str) -> ZResult<Option<DocumentId>> {
let user_ids = self.user_ids(reader)?; let external_ids = self.external_docids(reader)?;
Ok(user_ids.get(userid).map(|id| DocumentId(id as u32))) Ok(external_ids.get(external_docid).map(|id| DocumentId(id as u32)))
} }
pub fn put_words_fst(self, writer: &mut heed::RwTxn<MainT>, fst: &fst::Set) -> ZResult<()> { pub fn put_words_fst(self, writer: &mut heed::RwTxn<MainT>, fst: &fst::Set) -> ZResult<()> {

View File

@ -7,8 +7,8 @@ pub fn apply_clear_all(
index: &store::Index, index: &store::Index,
) -> MResult<()> { ) -> MResult<()> {
index.main.put_words_fst(writer, &fst::Set::default())?; index.main.put_words_fst(writer, &fst::Set::default())?;
index.main.put_user_ids(writer, &fst::Map::default())?; index.main.put_external_docids(writer, &fst::Map::default())?;
index.main.put_internal_ids(writer, &sdset::SetBuf::default())?; index.main.put_internal_docids(writer, &sdset::SetBuf::default())?;
index.main.put_ranked_map(writer, &RankedMap::default())?; index.main.put_ranked_map(writer, &RankedMap::default())?;
index.main.put_number_of_documents(writer, |_| 0)?; index.main.put_number_of_documents(writer, |_| 0)?;
index.documents_fields.clear(writer)?; index.documents_fields.clear(writer)?;

View File

@ -150,8 +150,8 @@ pub fn apply_addition<'a, 'b>(
partial: bool partial: bool
) -> MResult<()> { ) -> MResult<()> {
let mut documents_additions = HashMap::new(); let mut documents_additions = HashMap::new();
let mut new_user_ids = BTreeMap::new(); let mut new_external_docids = BTreeMap::new();
let mut new_internal_ids = Vec::with_capacity(new_documents.len()); let mut new_internal_docids = Vec::with_capacity(new_documents.len());
let mut schema = match index.main.schema(writer)? { let mut schema = match index.main.schema(writer)? {
Some(schema) => schema, Some(schema) => schema,
@ -159,17 +159,17 @@ pub fn apply_addition<'a, 'b>(
}; };
// Retrieve the documents ids related structures // Retrieve the documents ids related structures
let user_ids = index.main.user_ids(writer)?; let external_docids = index.main.external_docids(writer)?;
let internal_ids = index.main.internal_ids(writer)?; let internal_docids = index.main.internal_docids(writer)?;
let mut available_ids = DiscoverIds::new(&internal_ids); let mut available_ids = DiscoverIds::new(&internal_docids);
let primary_key = schema.primary_key().ok_or(Error::MissingPrimaryKey)?; let primary_key = schema.primary_key().ok_or(Error::MissingPrimaryKey)?;
// 1. store documents ids for future deletion // 1. store documents ids for future deletion
for mut document in new_documents { for mut document in new_documents {
let (document_id, userid) = extract_document_id(&primary_key, &document, &user_ids, &mut available_ids)?; let (document_id, userid) = extract_document_id(&primary_key, &document, &external_docids, &mut available_ids)?;
new_user_ids.insert(userid, document_id.0); new_external_docids.insert(userid, document_id.0);
new_internal_ids.push(document_id); new_internal_docids.push(document_id);
if partial { if partial {
let mut deserializer = Deserializer { let mut deserializer = Deserializer {
@ -192,7 +192,7 @@ pub fn apply_addition<'a, 'b>(
// 2. remove the documents postings lists // 2. remove the documents postings lists
let number_of_inserted_documents = documents_additions.len(); let number_of_inserted_documents = documents_additions.len();
let documents_ids = new_user_ids.iter().map(|(userid, _)| userid.clone()).collect(); let documents_ids = new_external_docids.iter().map(|(id, _)| id.clone()).collect();
apply_documents_deletion(writer, index, documents_ids)?; apply_documents_deletion(writer, index, documents_ids)?;
let mut ranked_map = match index.main.ranked_map(writer)? { let mut ranked_map = match index.main.ranked_map(writer)? {
@ -242,10 +242,10 @@ pub fn apply_addition<'a, 'b>(
index.main.put_schema(writer, &schema)?; index.main.put_schema(writer, &schema)?;
let new_user_ids = fst::Map::from_iter(new_user_ids.iter().map(|(u, i)| (u, *i as u64)))?; let new_external_docids = fst::Map::from_iter(new_external_docids.iter().map(|(u, i)| (u, *i as u64)))?;
let new_internal_ids = sdset::SetBuf::from_dirty(new_internal_ids); let new_internal_docids = sdset::SetBuf::from_dirty(new_internal_docids);
index.main.merge_user_ids(writer, &new_user_ids)?; index.main.merge_external_docids(writer, &new_external_docids)?;
index.main.merge_internal_ids(writer, &new_internal_ids)?; index.main.merge_internal_docids(writer, &new_internal_docids)?;
Ok(()) Ok(())
} }

View File

@ -31,7 +31,7 @@ impl DocumentsDeletion {
} }
} }
pub fn delete_document_by_user_id(&mut self, document_id: String) { pub fn delete_document_by_external_docid(&mut self, document_id: String) {
self.documents.push(document_id); self.documents.push(document_id);
} }
@ -73,19 +73,19 @@ pub fn apply_documents_deletion(
deletion: Vec<String>, deletion: Vec<String>,
) -> MResult<()> ) -> MResult<()>
{ {
let (user_ids, internal_ids) = { let (external_docids, internal_docids) = {
let new_user_ids = SetBuf::from_dirty(deletion); let new_external_docids = SetBuf::from_dirty(deletion);
let mut internal_ids = Vec::new(); let mut internal_docids = Vec::new();
let user_ids = index.main.user_ids(writer)?; let user_ids = index.main.external_docids(writer)?;
for userid in new_user_ids.as_slice() { for userid in new_external_docids.as_slice() {
if let Some(id) = user_ids.get(userid) { if let Some(id) = user_ids.get(userid) {
internal_ids.push(DocumentId(id as u32)); internal_docids.push(DocumentId(id as u32));
} }
} }
let new_user_ids = fst::Map::from_iter(new_user_ids.into_iter().map(|k| (k, 0))).unwrap(); let new_external_docids = fst::Map::from_iter(new_external_docids.into_iter().map(|k| (k, 0))).unwrap();
(new_user_ids, SetBuf::from_dirty(internal_ids)) (new_external_docids, SetBuf::from_dirty(internal_docids))
}; };
let schema = match index.main.schema(writer)? { let schema = match index.main.schema(writer)? {
@ -100,7 +100,7 @@ pub fn apply_documents_deletion(
// facet filters deletion // facet filters deletion
if let Some(attributes_for_facetting) = index.main.attributes_for_faceting(writer)? { if let Some(attributes_for_facetting) = index.main.attributes_for_faceting(writer)? {
let facet_map = facets::facet_map_from_docids(writer, &index, &internal_ids, &attributes_for_facetting)?; let facet_map = facets::facet_map_from_docids(writer, &index, &internal_docids, &attributes_for_facetting)?;
index.facets.remove(writer, facet_map)?; index.facets.remove(writer, facet_map)?;
} }
@ -108,7 +108,7 @@ pub fn apply_documents_deletion(
let ranked_fields = schema.ranked(); let ranked_fields = schema.ranked();
let mut words_document_ids = HashMap::new(); let mut words_document_ids = HashMap::new();
for id in internal_ids.iter().cloned() { for id in internal_docids.iter().cloned() {
// remove all the ranked attributes from the ranked_map // remove all the ranked attributes from the ranked_map
for ranked_attr in ranked_fields { for ranked_attr in ranked_fields {
ranked_map.remove(id, *ranked_attr); ranked_map.remove(id, *ranked_attr);
@ -179,8 +179,8 @@ pub fn apply_documents_deletion(
index.main.put_number_of_documents(writer, |old| old - deleted_documents_len)?; index.main.put_number_of_documents(writer, |old| old - deleted_documents_len)?;
// We apply the changes to the user and internal ids // We apply the changes to the user and internal ids
index.main.remove_user_ids(writer, &user_ids)?; index.main.remove_external_docids(writer, &external_docids)?;
index.main.remove_internal_ids(writer, &internal_ids)?; index.main.remove_internal_docids(writer, &internal_docids)?;
compute_short_prefixes(writer, index)?; compute_short_prefixes(writer, index)?;

View File

@ -44,7 +44,7 @@ async fn get_document(
.ok_or(ResponseError::index_not_found(&path.index_uid))?; .ok_or(ResponseError::index_not_found(&path.index_uid))?;
let reader = data.db.main_read_txn()?; let reader = data.db.main_read_txn()?;
let internal_id = index.main.user_to_internal_id(&reader, &path.document_id)?; let internal_id = index.main.external_to_internal_docid(&reader, &path.document_id)?;
let internal_id = match internal_id { let internal_id = match internal_id {
Some(internal_id) => internal_id, Some(internal_id) => internal_id,
@ -74,7 +74,7 @@ async fn delete_document(
let mut update_writer = data.db.update_write_txn()?; let mut update_writer = data.db.update_write_txn()?;
let mut documents_deletion = index.documents_deletion(); let mut documents_deletion = index.documents_deletion();
documents_deletion.delete_document_by_user_id(path.document_id.clone()); documents_deletion.delete_document_by_external_docid(path.document_id.clone());
let update_id = documents_deletion.finalize(&mut update_writer)?; let update_id = documents_deletion.finalize(&mut update_writer)?;
@ -242,7 +242,7 @@ async fn delete_documents(
for document_id in body.into_inner() { for document_id in body.into_inner() {
let document_id = update::value_to_string(&document_id); let document_id = update::value_to_string(&document_id);
documents_deletion.delete_document_by_user_id(document_id); documents_deletion.delete_document_by_external_docid(document_id);
} }
let update_id = documents_deletion.finalize(&mut writer)?; let update_id = documents_deletion.finalize(&mut writer)?;