Refactor a little bit

This commit is contained in:
Kerollmops 2020-05-20 15:21:08 +02:00
parent a60e3fb1cb
commit ddeb5745be
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
5 changed files with 52 additions and 47 deletions

View File

@ -107,15 +107,15 @@ impl Main {
self.main.put::<_, Str, ByteSlice>(writer, EXTERNAL_DOCIDS_KEY, ids.as_fst().as_bytes())
}
pub fn merge_external_docids(self, writer: &mut heed::RwTxn<MainT>, new_ids: &fst::Map) -> ZResult<()> {
pub fn merge_external_docids(self, writer: &mut heed::RwTxn<MainT>, new_docids: &fst::Map) -> ZResult<()> {
use fst::{Streamer, IntoStreamer};
// Do an union of the old and the new set of user ids.
// Do an union of the old and the new set of external docids.
let external_docids = self.external_docids(writer)?;
let mut op = external_docids.op().add(new_ids.into_stream()).r#union();
let mut op = external_docids.op().add(new_docids.into_stream()).r#union();
let mut build = fst::MapBuilder::memory();
while let Some((userid, values)) = op.next() {
build.insert(userid, values[0].value).unwrap();
while let Some((docid, values)) = op.next() {
build.insert(docid, values[0].value).unwrap();
}
let external_docids = build.into_inner().unwrap();
@ -126,12 +126,12 @@ impl Main {
pub fn remove_external_docids(self, writer: &mut heed::RwTxn<MainT>, ids: &fst::Map) -> ZResult<()> {
use fst::{Streamer, IntoStreamer};
// Do an union of the old and the new set of user ids.
// Do an union of the old and the new set of external docids.
let external_docids = self.external_docids(writer)?;
let mut op = external_docids.op().add(ids.into_stream()).difference();
let mut build = fst::MapBuilder::memory();
while let Some((userid, values)) = op.next() {
build.insert(userid, values[0].value).unwrap();
while let Some((docid, values)) = op.next() {
build.insert(docid, values[0].value).unwrap();
}
let external_docids = build.into_inner().unwrap();

View File

@ -148,11 +148,8 @@ pub fn apply_addition<'a, 'b>(
index: &store::Index,
new_documents: Vec<IndexMap<String, Value>>,
partial: bool
) -> MResult<()> {
let mut documents_additions = HashMap::new();
let mut new_external_docids = BTreeMap::new();
let mut new_internal_docids = Vec::with_capacity(new_documents.len());
) -> MResult<()>
{
let mut schema = match index.main.schema(writer)? {
Some(schema) => schema,
None => return Err(Error::SchemaMissing),
@ -166,14 +163,25 @@ pub fn apply_addition<'a, 'b>(
let primary_key = schema.primary_key().ok_or(Error::MissingPrimaryKey)?;
// 1. store documents ids for future deletion
let mut documents_additions = HashMap::new();
let mut new_external_docids = BTreeMap::new();
let mut new_internal_docids = Vec::with_capacity(new_documents.len());
for mut document in new_documents {
let (document_id, userid) = extract_document_id(&primary_key, &document, &external_docids, &mut available_ids)?;
new_external_docids.insert(userid, document_id.0);
new_internal_docids.push(document_id);
let (internal_docid, external_docid) =
extract_document_id(
&primary_key,
&document,
&external_docids,
&mut available_ids,
)?;
new_external_docids.insert(external_docid, internal_docid.0);
new_internal_docids.push(internal_docid);
if partial {
let mut deserializer = Deserializer {
document_id,
document_id: internal_docid,
reader: writer,
documents_fields: index.documents_fields,
schema: &schema,
@ -187,7 +195,7 @@ pub fn apply_addition<'a, 'b>(
}
}
}
documents_additions.insert(document_id, document);
documents_additions.insert(internal_docid, document);
}
// 2. remove the documents postings lists
@ -242,7 +250,7 @@ pub fn apply_addition<'a, 'b>(
index.main.put_schema(writer, &schema)?;
let new_external_docids = fst::Map::from_iter(new_external_docids.iter().map(|(u, i)| (u, *i as u64)))?;
let new_external_docids = fst::Map::from_iter(new_external_docids.iter().map(|(ext, id)| (ext, *id as u64)))?;
let new_internal_docids = sdset::SetBuf::from_dirty(new_internal_docids);
index.main.merge_external_docids(writer, &new_external_docids)?;
index.main.merge_internal_docids(writer, &new_internal_docids)?;

View File

@ -14,7 +14,7 @@ pub struct DocumentsDeletion {
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
updates_notifier: UpdateEventsEmitter,
documents: Vec<String>,
external_docids: Vec<String>,
}
impl DocumentsDeletion {
@ -27,12 +27,12 @@ impl DocumentsDeletion {
updates_store,
updates_results_store,
updates_notifier,
documents: Vec::new(),
external_docids: Vec::new(),
}
}
pub fn delete_document_by_external_docid(&mut self, document_id: String) {
self.documents.push(document_id);
self.external_docids.push(document_id);
}
pub fn finalize(self, writer: &mut heed::RwTxn<UpdateT>) -> MResult<u64> {
@ -41,7 +41,7 @@ impl DocumentsDeletion {
writer,
self.updates_store,
self.updates_results_store,
self.documents,
self.external_docids,
)?;
Ok(update_id)
}
@ -49,7 +49,7 @@ impl DocumentsDeletion {
impl Extend<String> for DocumentsDeletion {
fn extend<T: IntoIterator<Item=String>>(&mut self, iter: T) {
self.documents.extend(iter)
self.external_docids.extend(iter)
}
}
@ -57,11 +57,11 @@ pub fn push_documents_deletion(
writer: &mut heed::RwTxn<UpdateT>,
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
deletion: Vec<String>,
external_docids: Vec<String>,
) -> MResult<u64> {
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
let update = Update::documents_deletion(deletion);
let update = Update::documents_deletion(external_docids);
updates_store.put_update(writer, last_update_id, &update)?;
Ok(last_update_id)
@ -70,16 +70,16 @@ pub fn push_documents_deletion(
pub fn apply_documents_deletion(
writer: &mut heed::RwTxn<MainT>,
index: &store::Index,
deletion: Vec<String>,
external_docids: Vec<String>,
) -> MResult<()>
{
let (external_docids, internal_docids) = {
let new_external_docids = SetBuf::from_dirty(deletion);
let new_external_docids = SetBuf::from_dirty(external_docids);
let mut internal_docids = Vec::new();
let user_ids = index.main.external_docids(writer)?;
for userid in new_external_docids.as_slice() {
if let Some(id) = user_ids.get(userid) {
let old_external_docids = index.main.external_docids(writer)?;
for external_docid in new_external_docids.as_slice() {
if let Some(id) = old_external_docids.get(external_docid) {
internal_docids.push(DocumentId(id as u32));
}
}

View File

@ -98,16 +98,16 @@ pub fn value_to_number(value: &Value) -> Option<Number> {
/// Validates a string representation to be a correct document id and returns
/// the corresponding id or generate a new one, this is the way we produce documents ids.
pub fn discover_document_id(
userid: &str,
user_ids: &fst::Map,
available_ids: &mut DiscoverIds<'_>,
docid: &str,
external_docids: &fst::Map,
available_docids: &mut DiscoverIds<'_>,
) -> Result<DocumentId, SerializerError>
{
if userid.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') {
match user_ids.get(userid) {
if docid.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') {
match external_docids.get(docid) {
Some(id) => Ok(DocumentId(id as u32)),
None => {
let internal_id = available_ids.next().expect("no more ids available");
let internal_id = available_docids.next().expect("no more ids available");
Ok(internal_id)
},
}
@ -120,18 +120,18 @@ pub fn discover_document_id(
pub fn extract_document_id(
primary_key: &str,
document: &IndexMap<String, Value>,
user_ids: &fst::Map,
available_ids: &mut DiscoverIds<'_>,
external_docids: &fst::Map,
available_docids: &mut DiscoverIds<'_>,
) -> Result<(DocumentId, String), SerializerError>
{
match document.get(primary_key) {
Some(value) => {
let userid = match value {
let docid = match value {
Value::Number(number) => number.to_string(),
Value::String(string) => string.clone(),
_ => return Err(SerializerError::InvalidDocumentIdFormat),
};
discover_document_id(&userid, user_ids, available_ids).map(|id| (id, userid))
discover_document_id(&docid, external_docids, available_docids).map(|id| (id, docid))
}
None => Err(SerializerError::DocumentIdNotFound),
}

View File

@ -44,12 +44,9 @@ async fn get_document(
.ok_or(ResponseError::index_not_found(&path.index_uid))?;
let reader = data.db.main_read_txn()?;
let internal_id = index.main.external_to_internal_docid(&reader, &path.document_id)?;
let internal_id = match internal_id {
Some(internal_id) => internal_id,
None => return Err(ResponseError::document_not_found(&path.document_id)),
};
let internal_id = index.main
.external_to_internal_docid(&reader, &path.document_id)?
.ok_or(ResponseError::document_not_found(&path.document_id))?;
let response: Document = index
.document(&reader, None, internal_id)?