mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-23 03:37:28 +01:00
Refactor a little bit
This commit is contained in:
parent
a60e3fb1cb
commit
ddeb5745be
@ -107,15 +107,15 @@ impl Main {
|
||||
self.main.put::<_, Str, ByteSlice>(writer, EXTERNAL_DOCIDS_KEY, ids.as_fst().as_bytes())
|
||||
}
|
||||
|
||||
pub fn merge_external_docids(self, writer: &mut heed::RwTxn<MainT>, new_ids: &fst::Map) -> ZResult<()> {
|
||||
pub fn merge_external_docids(self, writer: &mut heed::RwTxn<MainT>, new_docids: &fst::Map) -> ZResult<()> {
|
||||
use fst::{Streamer, IntoStreamer};
|
||||
|
||||
// Do an union of the old and the new set of user ids.
|
||||
// Do an union of the old and the new set of external docids.
|
||||
let external_docids = self.external_docids(writer)?;
|
||||
let mut op = external_docids.op().add(new_ids.into_stream()).r#union();
|
||||
let mut op = external_docids.op().add(new_docids.into_stream()).r#union();
|
||||
let mut build = fst::MapBuilder::memory();
|
||||
while let Some((userid, values)) = op.next() {
|
||||
build.insert(userid, values[0].value).unwrap();
|
||||
while let Some((docid, values)) = op.next() {
|
||||
build.insert(docid, values[0].value).unwrap();
|
||||
}
|
||||
let external_docids = build.into_inner().unwrap();
|
||||
|
||||
@ -126,12 +126,12 @@ impl Main {
|
||||
pub fn remove_external_docids(self, writer: &mut heed::RwTxn<MainT>, ids: &fst::Map) -> ZResult<()> {
|
||||
use fst::{Streamer, IntoStreamer};
|
||||
|
||||
// Do an union of the old and the new set of user ids.
|
||||
// Do an union of the old and the new set of external docids.
|
||||
let external_docids = self.external_docids(writer)?;
|
||||
let mut op = external_docids.op().add(ids.into_stream()).difference();
|
||||
let mut build = fst::MapBuilder::memory();
|
||||
while let Some((userid, values)) = op.next() {
|
||||
build.insert(userid, values[0].value).unwrap();
|
||||
while let Some((docid, values)) = op.next() {
|
||||
build.insert(docid, values[0].value).unwrap();
|
||||
}
|
||||
let external_docids = build.into_inner().unwrap();
|
||||
|
||||
|
@ -148,11 +148,8 @@ pub fn apply_addition<'a, 'b>(
|
||||
index: &store::Index,
|
||||
new_documents: Vec<IndexMap<String, Value>>,
|
||||
partial: bool
|
||||
) -> MResult<()> {
|
||||
let mut documents_additions = HashMap::new();
|
||||
let mut new_external_docids = BTreeMap::new();
|
||||
let mut new_internal_docids = Vec::with_capacity(new_documents.len());
|
||||
|
||||
) -> MResult<()>
|
||||
{
|
||||
let mut schema = match index.main.schema(writer)? {
|
||||
Some(schema) => schema,
|
||||
None => return Err(Error::SchemaMissing),
|
||||
@ -166,14 +163,25 @@ pub fn apply_addition<'a, 'b>(
|
||||
let primary_key = schema.primary_key().ok_or(Error::MissingPrimaryKey)?;
|
||||
|
||||
// 1. store documents ids for future deletion
|
||||
let mut documents_additions = HashMap::new();
|
||||
let mut new_external_docids = BTreeMap::new();
|
||||
let mut new_internal_docids = Vec::with_capacity(new_documents.len());
|
||||
|
||||
for mut document in new_documents {
|
||||
let (document_id, userid) = extract_document_id(&primary_key, &document, &external_docids, &mut available_ids)?;
|
||||
new_external_docids.insert(userid, document_id.0);
|
||||
new_internal_docids.push(document_id);
|
||||
let (internal_docid, external_docid) =
|
||||
extract_document_id(
|
||||
&primary_key,
|
||||
&document,
|
||||
&external_docids,
|
||||
&mut available_ids,
|
||||
)?;
|
||||
|
||||
new_external_docids.insert(external_docid, internal_docid.0);
|
||||
new_internal_docids.push(internal_docid);
|
||||
|
||||
if partial {
|
||||
let mut deserializer = Deserializer {
|
||||
document_id,
|
||||
document_id: internal_docid,
|
||||
reader: writer,
|
||||
documents_fields: index.documents_fields,
|
||||
schema: &schema,
|
||||
@ -187,7 +195,7 @@ pub fn apply_addition<'a, 'b>(
|
||||
}
|
||||
}
|
||||
}
|
||||
documents_additions.insert(document_id, document);
|
||||
documents_additions.insert(internal_docid, document);
|
||||
}
|
||||
|
||||
// 2. remove the documents postings lists
|
||||
@ -242,7 +250,7 @@ pub fn apply_addition<'a, 'b>(
|
||||
|
||||
index.main.put_schema(writer, &schema)?;
|
||||
|
||||
let new_external_docids = fst::Map::from_iter(new_external_docids.iter().map(|(u, i)| (u, *i as u64)))?;
|
||||
let new_external_docids = fst::Map::from_iter(new_external_docids.iter().map(|(ext, id)| (ext, *id as u64)))?;
|
||||
let new_internal_docids = sdset::SetBuf::from_dirty(new_internal_docids);
|
||||
index.main.merge_external_docids(writer, &new_external_docids)?;
|
||||
index.main.merge_internal_docids(writer, &new_internal_docids)?;
|
||||
|
@ -14,7 +14,7 @@ pub struct DocumentsDeletion {
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
documents: Vec<String>,
|
||||
external_docids: Vec<String>,
|
||||
}
|
||||
|
||||
impl DocumentsDeletion {
|
||||
@ -27,12 +27,12 @@ impl DocumentsDeletion {
|
||||
updates_store,
|
||||
updates_results_store,
|
||||
updates_notifier,
|
||||
documents: Vec::new(),
|
||||
external_docids: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn delete_document_by_external_docid(&mut self, document_id: String) {
|
||||
self.documents.push(document_id);
|
||||
self.external_docids.push(document_id);
|
||||
}
|
||||
|
||||
pub fn finalize(self, writer: &mut heed::RwTxn<UpdateT>) -> MResult<u64> {
|
||||
@ -41,7 +41,7 @@ impl DocumentsDeletion {
|
||||
writer,
|
||||
self.updates_store,
|
||||
self.updates_results_store,
|
||||
self.documents,
|
||||
self.external_docids,
|
||||
)?;
|
||||
Ok(update_id)
|
||||
}
|
||||
@ -49,7 +49,7 @@ impl DocumentsDeletion {
|
||||
|
||||
impl Extend<String> for DocumentsDeletion {
|
||||
fn extend<T: IntoIterator<Item=String>>(&mut self, iter: T) {
|
||||
self.documents.extend(iter)
|
||||
self.external_docids.extend(iter)
|
||||
}
|
||||
}
|
||||
|
||||
@ -57,11 +57,11 @@ pub fn push_documents_deletion(
|
||||
writer: &mut heed::RwTxn<UpdateT>,
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
deletion: Vec<String>,
|
||||
external_docids: Vec<String>,
|
||||
) -> MResult<u64> {
|
||||
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
|
||||
|
||||
let update = Update::documents_deletion(deletion);
|
||||
let update = Update::documents_deletion(external_docids);
|
||||
updates_store.put_update(writer, last_update_id, &update)?;
|
||||
|
||||
Ok(last_update_id)
|
||||
@ -70,16 +70,16 @@ pub fn push_documents_deletion(
|
||||
pub fn apply_documents_deletion(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
index: &store::Index,
|
||||
deletion: Vec<String>,
|
||||
external_docids: Vec<String>,
|
||||
) -> MResult<()>
|
||||
{
|
||||
let (external_docids, internal_docids) = {
|
||||
let new_external_docids = SetBuf::from_dirty(deletion);
|
||||
let new_external_docids = SetBuf::from_dirty(external_docids);
|
||||
let mut internal_docids = Vec::new();
|
||||
|
||||
let user_ids = index.main.external_docids(writer)?;
|
||||
for userid in new_external_docids.as_slice() {
|
||||
if let Some(id) = user_ids.get(userid) {
|
||||
let old_external_docids = index.main.external_docids(writer)?;
|
||||
for external_docid in new_external_docids.as_slice() {
|
||||
if let Some(id) = old_external_docids.get(external_docid) {
|
||||
internal_docids.push(DocumentId(id as u32));
|
||||
}
|
||||
}
|
||||
|
@ -98,16 +98,16 @@ pub fn value_to_number(value: &Value) -> Option<Number> {
|
||||
/// Validates a string representation to be a correct document id and returns
|
||||
/// the corresponding id or generate a new one, this is the way we produce documents ids.
|
||||
pub fn discover_document_id(
|
||||
userid: &str,
|
||||
user_ids: &fst::Map,
|
||||
available_ids: &mut DiscoverIds<'_>,
|
||||
docid: &str,
|
||||
external_docids: &fst::Map,
|
||||
available_docids: &mut DiscoverIds<'_>,
|
||||
) -> Result<DocumentId, SerializerError>
|
||||
{
|
||||
if userid.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') {
|
||||
match user_ids.get(userid) {
|
||||
if docid.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') {
|
||||
match external_docids.get(docid) {
|
||||
Some(id) => Ok(DocumentId(id as u32)),
|
||||
None => {
|
||||
let internal_id = available_ids.next().expect("no more ids available");
|
||||
let internal_id = available_docids.next().expect("no more ids available");
|
||||
Ok(internal_id)
|
||||
},
|
||||
}
|
||||
@ -120,18 +120,18 @@ pub fn discover_document_id(
|
||||
pub fn extract_document_id(
|
||||
primary_key: &str,
|
||||
document: &IndexMap<String, Value>,
|
||||
user_ids: &fst::Map,
|
||||
available_ids: &mut DiscoverIds<'_>,
|
||||
external_docids: &fst::Map,
|
||||
available_docids: &mut DiscoverIds<'_>,
|
||||
) -> Result<(DocumentId, String), SerializerError>
|
||||
{
|
||||
match document.get(primary_key) {
|
||||
Some(value) => {
|
||||
let userid = match value {
|
||||
let docid = match value {
|
||||
Value::Number(number) => number.to_string(),
|
||||
Value::String(string) => string.clone(),
|
||||
_ => return Err(SerializerError::InvalidDocumentIdFormat),
|
||||
};
|
||||
discover_document_id(&userid, user_ids, available_ids).map(|id| (id, userid))
|
||||
discover_document_id(&docid, external_docids, available_docids).map(|id| (id, docid))
|
||||
}
|
||||
None => Err(SerializerError::DocumentIdNotFound),
|
||||
}
|
||||
|
@ -44,12 +44,9 @@ async fn get_document(
|
||||
.ok_or(ResponseError::index_not_found(&path.index_uid))?;
|
||||
|
||||
let reader = data.db.main_read_txn()?;
|
||||
let internal_id = index.main.external_to_internal_docid(&reader, &path.document_id)?;
|
||||
|
||||
let internal_id = match internal_id {
|
||||
Some(internal_id) => internal_id,
|
||||
None => return Err(ResponseError::document_not_found(&path.document_id)),
|
||||
};
|
||||
let internal_id = index.main
|
||||
.external_to_internal_docid(&reader, &path.document_id)?
|
||||
.ok_or(ResponseError::document_not_found(&path.document_id))?;
|
||||
|
||||
let response: Document = index
|
||||
.document(&reader, None, internal_id)?
|
||||
|
Loading…
x
Reference in New Issue
Block a user