Refactor a little bit

This commit is contained in:
Kerollmops 2020-05-20 15:21:08 +02:00
parent a60e3fb1cb
commit ddeb5745be
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
5 changed files with 52 additions and 47 deletions

View File

@ -107,15 +107,15 @@ impl Main {
self.main.put::<_, Str, ByteSlice>(writer, EXTERNAL_DOCIDS_KEY, ids.as_fst().as_bytes()) self.main.put::<_, Str, ByteSlice>(writer, EXTERNAL_DOCIDS_KEY, ids.as_fst().as_bytes())
} }
pub fn merge_external_docids(self, writer: &mut heed::RwTxn<MainT>, new_ids: &fst::Map) -> ZResult<()> { pub fn merge_external_docids(self, writer: &mut heed::RwTxn<MainT>, new_docids: &fst::Map) -> ZResult<()> {
use fst::{Streamer, IntoStreamer}; use fst::{Streamer, IntoStreamer};
// Do an union of the old and the new set of user ids. // Do an union of the old and the new set of external docids.
let external_docids = self.external_docids(writer)?; let external_docids = self.external_docids(writer)?;
let mut op = external_docids.op().add(new_ids.into_stream()).r#union(); let mut op = external_docids.op().add(new_docids.into_stream()).r#union();
let mut build = fst::MapBuilder::memory(); let mut build = fst::MapBuilder::memory();
while let Some((userid, values)) = op.next() { while let Some((docid, values)) = op.next() {
build.insert(userid, values[0].value).unwrap(); build.insert(docid, values[0].value).unwrap();
} }
let external_docids = build.into_inner().unwrap(); let external_docids = build.into_inner().unwrap();
@ -126,12 +126,12 @@ impl Main {
pub fn remove_external_docids(self, writer: &mut heed::RwTxn<MainT>, ids: &fst::Map) -> ZResult<()> { pub fn remove_external_docids(self, writer: &mut heed::RwTxn<MainT>, ids: &fst::Map) -> ZResult<()> {
use fst::{Streamer, IntoStreamer}; use fst::{Streamer, IntoStreamer};
// Do an union of the old and the new set of user ids. // Do an union of the old and the new set of external docids.
let external_docids = self.external_docids(writer)?; let external_docids = self.external_docids(writer)?;
let mut op = external_docids.op().add(ids.into_stream()).difference(); let mut op = external_docids.op().add(ids.into_stream()).difference();
let mut build = fst::MapBuilder::memory(); let mut build = fst::MapBuilder::memory();
while let Some((userid, values)) = op.next() { while let Some((docid, values)) = op.next() {
build.insert(userid, values[0].value).unwrap(); build.insert(docid, values[0].value).unwrap();
} }
let external_docids = build.into_inner().unwrap(); let external_docids = build.into_inner().unwrap();

View File

@ -148,11 +148,8 @@ pub fn apply_addition<'a, 'b>(
index: &store::Index, index: &store::Index,
new_documents: Vec<IndexMap<String, Value>>, new_documents: Vec<IndexMap<String, Value>>,
partial: bool partial: bool
) -> MResult<()> { ) -> MResult<()>
let mut documents_additions = HashMap::new(); {
let mut new_external_docids = BTreeMap::new();
let mut new_internal_docids = Vec::with_capacity(new_documents.len());
let mut schema = match index.main.schema(writer)? { let mut schema = match index.main.schema(writer)? {
Some(schema) => schema, Some(schema) => schema,
None => return Err(Error::SchemaMissing), None => return Err(Error::SchemaMissing),
@ -166,14 +163,25 @@ pub fn apply_addition<'a, 'b>(
let primary_key = schema.primary_key().ok_or(Error::MissingPrimaryKey)?; let primary_key = schema.primary_key().ok_or(Error::MissingPrimaryKey)?;
// 1. store documents ids for future deletion // 1. store documents ids for future deletion
let mut documents_additions = HashMap::new();
let mut new_external_docids = BTreeMap::new();
let mut new_internal_docids = Vec::with_capacity(new_documents.len());
for mut document in new_documents { for mut document in new_documents {
let (document_id, userid) = extract_document_id(&primary_key, &document, &external_docids, &mut available_ids)?; let (internal_docid, external_docid) =
new_external_docids.insert(userid, document_id.0); extract_document_id(
new_internal_docids.push(document_id); &primary_key,
&document,
&external_docids,
&mut available_ids,
)?;
new_external_docids.insert(external_docid, internal_docid.0);
new_internal_docids.push(internal_docid);
if partial { if partial {
let mut deserializer = Deserializer { let mut deserializer = Deserializer {
document_id, document_id: internal_docid,
reader: writer, reader: writer,
documents_fields: index.documents_fields, documents_fields: index.documents_fields,
schema: &schema, schema: &schema,
@ -187,7 +195,7 @@ pub fn apply_addition<'a, 'b>(
} }
} }
} }
documents_additions.insert(document_id, document); documents_additions.insert(internal_docid, document);
} }
// 2. remove the documents postings lists // 2. remove the documents postings lists
@ -242,7 +250,7 @@ pub fn apply_addition<'a, 'b>(
index.main.put_schema(writer, &schema)?; index.main.put_schema(writer, &schema)?;
let new_external_docids = fst::Map::from_iter(new_external_docids.iter().map(|(u, i)| (u, *i as u64)))?; let new_external_docids = fst::Map::from_iter(new_external_docids.iter().map(|(ext, id)| (ext, *id as u64)))?;
let new_internal_docids = sdset::SetBuf::from_dirty(new_internal_docids); let new_internal_docids = sdset::SetBuf::from_dirty(new_internal_docids);
index.main.merge_external_docids(writer, &new_external_docids)?; index.main.merge_external_docids(writer, &new_external_docids)?;
index.main.merge_internal_docids(writer, &new_internal_docids)?; index.main.merge_internal_docids(writer, &new_internal_docids)?;

View File

@ -14,7 +14,7 @@ pub struct DocumentsDeletion {
updates_store: store::Updates, updates_store: store::Updates,
updates_results_store: store::UpdatesResults, updates_results_store: store::UpdatesResults,
updates_notifier: UpdateEventsEmitter, updates_notifier: UpdateEventsEmitter,
documents: Vec<String>, external_docids: Vec<String>,
} }
impl DocumentsDeletion { impl DocumentsDeletion {
@ -27,12 +27,12 @@ impl DocumentsDeletion {
updates_store, updates_store,
updates_results_store, updates_results_store,
updates_notifier, updates_notifier,
documents: Vec::new(), external_docids: Vec::new(),
} }
} }
pub fn delete_document_by_external_docid(&mut self, document_id: String) { pub fn delete_document_by_external_docid(&mut self, document_id: String) {
self.documents.push(document_id); self.external_docids.push(document_id);
} }
pub fn finalize(self, writer: &mut heed::RwTxn<UpdateT>) -> MResult<u64> { pub fn finalize(self, writer: &mut heed::RwTxn<UpdateT>) -> MResult<u64> {
@ -41,7 +41,7 @@ impl DocumentsDeletion {
writer, writer,
self.updates_store, self.updates_store,
self.updates_results_store, self.updates_results_store,
self.documents, self.external_docids,
)?; )?;
Ok(update_id) Ok(update_id)
} }
@ -49,7 +49,7 @@ impl DocumentsDeletion {
impl Extend<String> for DocumentsDeletion { impl Extend<String> for DocumentsDeletion {
fn extend<T: IntoIterator<Item=String>>(&mut self, iter: T) { fn extend<T: IntoIterator<Item=String>>(&mut self, iter: T) {
self.documents.extend(iter) self.external_docids.extend(iter)
} }
} }
@ -57,11 +57,11 @@ pub fn push_documents_deletion(
writer: &mut heed::RwTxn<UpdateT>, writer: &mut heed::RwTxn<UpdateT>,
updates_store: store::Updates, updates_store: store::Updates,
updates_results_store: store::UpdatesResults, updates_results_store: store::UpdatesResults,
deletion: Vec<String>, external_docids: Vec<String>,
) -> MResult<u64> { ) -> MResult<u64> {
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?; let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
let update = Update::documents_deletion(deletion); let update = Update::documents_deletion(external_docids);
updates_store.put_update(writer, last_update_id, &update)?; updates_store.put_update(writer, last_update_id, &update)?;
Ok(last_update_id) Ok(last_update_id)
@ -70,16 +70,16 @@ pub fn push_documents_deletion(
pub fn apply_documents_deletion( pub fn apply_documents_deletion(
writer: &mut heed::RwTxn<MainT>, writer: &mut heed::RwTxn<MainT>,
index: &store::Index, index: &store::Index,
deletion: Vec<String>, external_docids: Vec<String>,
) -> MResult<()> ) -> MResult<()>
{ {
let (external_docids, internal_docids) = { let (external_docids, internal_docids) = {
let new_external_docids = SetBuf::from_dirty(deletion); let new_external_docids = SetBuf::from_dirty(external_docids);
let mut internal_docids = Vec::new(); let mut internal_docids = Vec::new();
let user_ids = index.main.external_docids(writer)?; let old_external_docids = index.main.external_docids(writer)?;
for userid in new_external_docids.as_slice() { for external_docid in new_external_docids.as_slice() {
if let Some(id) = user_ids.get(userid) { if let Some(id) = old_external_docids.get(external_docid) {
internal_docids.push(DocumentId(id as u32)); internal_docids.push(DocumentId(id as u32));
} }
} }

View File

@ -98,16 +98,16 @@ pub fn value_to_number(value: &Value) -> Option<Number> {
/// Validates a string representation to be a correct document id and returns /// Validates a string representation to be a correct document id and returns
/// the corresponding id or generate a new one, this is the way we produce documents ids. /// the corresponding id or generate a new one, this is the way we produce documents ids.
pub fn discover_document_id( pub fn discover_document_id(
userid: &str, docid: &str,
user_ids: &fst::Map, external_docids: &fst::Map,
available_ids: &mut DiscoverIds<'_>, available_docids: &mut DiscoverIds<'_>,
) -> Result<DocumentId, SerializerError> ) -> Result<DocumentId, SerializerError>
{ {
if userid.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') { if docid.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') {
match user_ids.get(userid) { match external_docids.get(docid) {
Some(id) => Ok(DocumentId(id as u32)), Some(id) => Ok(DocumentId(id as u32)),
None => { None => {
let internal_id = available_ids.next().expect("no more ids available"); let internal_id = available_docids.next().expect("no more ids available");
Ok(internal_id) Ok(internal_id)
}, },
} }
@ -120,18 +120,18 @@ pub fn discover_document_id(
pub fn extract_document_id( pub fn extract_document_id(
primary_key: &str, primary_key: &str,
document: &IndexMap<String, Value>, document: &IndexMap<String, Value>,
user_ids: &fst::Map, external_docids: &fst::Map,
available_ids: &mut DiscoverIds<'_>, available_docids: &mut DiscoverIds<'_>,
) -> Result<(DocumentId, String), SerializerError> ) -> Result<(DocumentId, String), SerializerError>
{ {
match document.get(primary_key) { match document.get(primary_key) {
Some(value) => { Some(value) => {
let userid = match value { let docid = match value {
Value::Number(number) => number.to_string(), Value::Number(number) => number.to_string(),
Value::String(string) => string.clone(), Value::String(string) => string.clone(),
_ => return Err(SerializerError::InvalidDocumentIdFormat), _ => return Err(SerializerError::InvalidDocumentIdFormat),
}; };
discover_document_id(&userid, user_ids, available_ids).map(|id| (id, userid)) discover_document_id(&docid, external_docids, available_docids).map(|id| (id, docid))
} }
None => Err(SerializerError::DocumentIdNotFound), None => Err(SerializerError::DocumentIdNotFound),
} }

View File

@ -44,12 +44,9 @@ async fn get_document(
.ok_or(ResponseError::index_not_found(&path.index_uid))?; .ok_or(ResponseError::index_not_found(&path.index_uid))?;
let reader = data.db.main_read_txn()?; let reader = data.db.main_read_txn()?;
let internal_id = index.main.external_to_internal_docid(&reader, &path.document_id)?; let internal_id = index.main
.external_to_internal_docid(&reader, &path.document_id)?
let internal_id = match internal_id { .ok_or(ResponseError::document_not_found(&path.document_id))?;
Some(internal_id) => internal_id,
None => return Err(ResponseError::document_not_found(&path.document_id)),
};
let response: Document = index let response: Document = index
.document(&reader, None, internal_id)? .document(&reader, None, internal_id)?