mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-25 06:00:08 +01:00
Refactor a little bit
This commit is contained in:
parent
a60e3fb1cb
commit
ddeb5745be
@ -107,15 +107,15 @@ impl Main {
|
|||||||
self.main.put::<_, Str, ByteSlice>(writer, EXTERNAL_DOCIDS_KEY, ids.as_fst().as_bytes())
|
self.main.put::<_, Str, ByteSlice>(writer, EXTERNAL_DOCIDS_KEY, ids.as_fst().as_bytes())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn merge_external_docids(self, writer: &mut heed::RwTxn<MainT>, new_ids: &fst::Map) -> ZResult<()> {
|
pub fn merge_external_docids(self, writer: &mut heed::RwTxn<MainT>, new_docids: &fst::Map) -> ZResult<()> {
|
||||||
use fst::{Streamer, IntoStreamer};
|
use fst::{Streamer, IntoStreamer};
|
||||||
|
|
||||||
// Do an union of the old and the new set of user ids.
|
// Do an union of the old and the new set of external docids.
|
||||||
let external_docids = self.external_docids(writer)?;
|
let external_docids = self.external_docids(writer)?;
|
||||||
let mut op = external_docids.op().add(new_ids.into_stream()).r#union();
|
let mut op = external_docids.op().add(new_docids.into_stream()).r#union();
|
||||||
let mut build = fst::MapBuilder::memory();
|
let mut build = fst::MapBuilder::memory();
|
||||||
while let Some((userid, values)) = op.next() {
|
while let Some((docid, values)) = op.next() {
|
||||||
build.insert(userid, values[0].value).unwrap();
|
build.insert(docid, values[0].value).unwrap();
|
||||||
}
|
}
|
||||||
let external_docids = build.into_inner().unwrap();
|
let external_docids = build.into_inner().unwrap();
|
||||||
|
|
||||||
@ -126,12 +126,12 @@ impl Main {
|
|||||||
pub fn remove_external_docids(self, writer: &mut heed::RwTxn<MainT>, ids: &fst::Map) -> ZResult<()> {
|
pub fn remove_external_docids(self, writer: &mut heed::RwTxn<MainT>, ids: &fst::Map) -> ZResult<()> {
|
||||||
use fst::{Streamer, IntoStreamer};
|
use fst::{Streamer, IntoStreamer};
|
||||||
|
|
||||||
// Do an union of the old and the new set of user ids.
|
// Do an union of the old and the new set of external docids.
|
||||||
let external_docids = self.external_docids(writer)?;
|
let external_docids = self.external_docids(writer)?;
|
||||||
let mut op = external_docids.op().add(ids.into_stream()).difference();
|
let mut op = external_docids.op().add(ids.into_stream()).difference();
|
||||||
let mut build = fst::MapBuilder::memory();
|
let mut build = fst::MapBuilder::memory();
|
||||||
while let Some((userid, values)) = op.next() {
|
while let Some((docid, values)) = op.next() {
|
||||||
build.insert(userid, values[0].value).unwrap();
|
build.insert(docid, values[0].value).unwrap();
|
||||||
}
|
}
|
||||||
let external_docids = build.into_inner().unwrap();
|
let external_docids = build.into_inner().unwrap();
|
||||||
|
|
||||||
|
@ -148,11 +148,8 @@ pub fn apply_addition<'a, 'b>(
|
|||||||
index: &store::Index,
|
index: &store::Index,
|
||||||
new_documents: Vec<IndexMap<String, Value>>,
|
new_documents: Vec<IndexMap<String, Value>>,
|
||||||
partial: bool
|
partial: bool
|
||||||
) -> MResult<()> {
|
) -> MResult<()>
|
||||||
let mut documents_additions = HashMap::new();
|
{
|
||||||
let mut new_external_docids = BTreeMap::new();
|
|
||||||
let mut new_internal_docids = Vec::with_capacity(new_documents.len());
|
|
||||||
|
|
||||||
let mut schema = match index.main.schema(writer)? {
|
let mut schema = match index.main.schema(writer)? {
|
||||||
Some(schema) => schema,
|
Some(schema) => schema,
|
||||||
None => return Err(Error::SchemaMissing),
|
None => return Err(Error::SchemaMissing),
|
||||||
@ -166,14 +163,25 @@ pub fn apply_addition<'a, 'b>(
|
|||||||
let primary_key = schema.primary_key().ok_or(Error::MissingPrimaryKey)?;
|
let primary_key = schema.primary_key().ok_or(Error::MissingPrimaryKey)?;
|
||||||
|
|
||||||
// 1. store documents ids for future deletion
|
// 1. store documents ids for future deletion
|
||||||
|
let mut documents_additions = HashMap::new();
|
||||||
|
let mut new_external_docids = BTreeMap::new();
|
||||||
|
let mut new_internal_docids = Vec::with_capacity(new_documents.len());
|
||||||
|
|
||||||
for mut document in new_documents {
|
for mut document in new_documents {
|
||||||
let (document_id, userid) = extract_document_id(&primary_key, &document, &external_docids, &mut available_ids)?;
|
let (internal_docid, external_docid) =
|
||||||
new_external_docids.insert(userid, document_id.0);
|
extract_document_id(
|
||||||
new_internal_docids.push(document_id);
|
&primary_key,
|
||||||
|
&document,
|
||||||
|
&external_docids,
|
||||||
|
&mut available_ids,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
new_external_docids.insert(external_docid, internal_docid.0);
|
||||||
|
new_internal_docids.push(internal_docid);
|
||||||
|
|
||||||
if partial {
|
if partial {
|
||||||
let mut deserializer = Deserializer {
|
let mut deserializer = Deserializer {
|
||||||
document_id,
|
document_id: internal_docid,
|
||||||
reader: writer,
|
reader: writer,
|
||||||
documents_fields: index.documents_fields,
|
documents_fields: index.documents_fields,
|
||||||
schema: &schema,
|
schema: &schema,
|
||||||
@ -187,7 +195,7 @@ pub fn apply_addition<'a, 'b>(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
documents_additions.insert(document_id, document);
|
documents_additions.insert(internal_docid, document);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 2. remove the documents postings lists
|
// 2. remove the documents postings lists
|
||||||
@ -242,7 +250,7 @@ pub fn apply_addition<'a, 'b>(
|
|||||||
|
|
||||||
index.main.put_schema(writer, &schema)?;
|
index.main.put_schema(writer, &schema)?;
|
||||||
|
|
||||||
let new_external_docids = fst::Map::from_iter(new_external_docids.iter().map(|(u, i)| (u, *i as u64)))?;
|
let new_external_docids = fst::Map::from_iter(new_external_docids.iter().map(|(ext, id)| (ext, *id as u64)))?;
|
||||||
let new_internal_docids = sdset::SetBuf::from_dirty(new_internal_docids);
|
let new_internal_docids = sdset::SetBuf::from_dirty(new_internal_docids);
|
||||||
index.main.merge_external_docids(writer, &new_external_docids)?;
|
index.main.merge_external_docids(writer, &new_external_docids)?;
|
||||||
index.main.merge_internal_docids(writer, &new_internal_docids)?;
|
index.main.merge_internal_docids(writer, &new_internal_docids)?;
|
||||||
|
@ -14,7 +14,7 @@ pub struct DocumentsDeletion {
|
|||||||
updates_store: store::Updates,
|
updates_store: store::Updates,
|
||||||
updates_results_store: store::UpdatesResults,
|
updates_results_store: store::UpdatesResults,
|
||||||
updates_notifier: UpdateEventsEmitter,
|
updates_notifier: UpdateEventsEmitter,
|
||||||
documents: Vec<String>,
|
external_docids: Vec<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl DocumentsDeletion {
|
impl DocumentsDeletion {
|
||||||
@ -27,12 +27,12 @@ impl DocumentsDeletion {
|
|||||||
updates_store,
|
updates_store,
|
||||||
updates_results_store,
|
updates_results_store,
|
||||||
updates_notifier,
|
updates_notifier,
|
||||||
documents: Vec::new(),
|
external_docids: Vec::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn delete_document_by_external_docid(&mut self, document_id: String) {
|
pub fn delete_document_by_external_docid(&mut self, document_id: String) {
|
||||||
self.documents.push(document_id);
|
self.external_docids.push(document_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn finalize(self, writer: &mut heed::RwTxn<UpdateT>) -> MResult<u64> {
|
pub fn finalize(self, writer: &mut heed::RwTxn<UpdateT>) -> MResult<u64> {
|
||||||
@ -41,7 +41,7 @@ impl DocumentsDeletion {
|
|||||||
writer,
|
writer,
|
||||||
self.updates_store,
|
self.updates_store,
|
||||||
self.updates_results_store,
|
self.updates_results_store,
|
||||||
self.documents,
|
self.external_docids,
|
||||||
)?;
|
)?;
|
||||||
Ok(update_id)
|
Ok(update_id)
|
||||||
}
|
}
|
||||||
@ -49,7 +49,7 @@ impl DocumentsDeletion {
|
|||||||
|
|
||||||
impl Extend<String> for DocumentsDeletion {
|
impl Extend<String> for DocumentsDeletion {
|
||||||
fn extend<T: IntoIterator<Item=String>>(&mut self, iter: T) {
|
fn extend<T: IntoIterator<Item=String>>(&mut self, iter: T) {
|
||||||
self.documents.extend(iter)
|
self.external_docids.extend(iter)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -57,11 +57,11 @@ pub fn push_documents_deletion(
|
|||||||
writer: &mut heed::RwTxn<UpdateT>,
|
writer: &mut heed::RwTxn<UpdateT>,
|
||||||
updates_store: store::Updates,
|
updates_store: store::Updates,
|
||||||
updates_results_store: store::UpdatesResults,
|
updates_results_store: store::UpdatesResults,
|
||||||
deletion: Vec<String>,
|
external_docids: Vec<String>,
|
||||||
) -> MResult<u64> {
|
) -> MResult<u64> {
|
||||||
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
|
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
|
||||||
|
|
||||||
let update = Update::documents_deletion(deletion);
|
let update = Update::documents_deletion(external_docids);
|
||||||
updates_store.put_update(writer, last_update_id, &update)?;
|
updates_store.put_update(writer, last_update_id, &update)?;
|
||||||
|
|
||||||
Ok(last_update_id)
|
Ok(last_update_id)
|
||||||
@ -70,16 +70,16 @@ pub fn push_documents_deletion(
|
|||||||
pub fn apply_documents_deletion(
|
pub fn apply_documents_deletion(
|
||||||
writer: &mut heed::RwTxn<MainT>,
|
writer: &mut heed::RwTxn<MainT>,
|
||||||
index: &store::Index,
|
index: &store::Index,
|
||||||
deletion: Vec<String>,
|
external_docids: Vec<String>,
|
||||||
) -> MResult<()>
|
) -> MResult<()>
|
||||||
{
|
{
|
||||||
let (external_docids, internal_docids) = {
|
let (external_docids, internal_docids) = {
|
||||||
let new_external_docids = SetBuf::from_dirty(deletion);
|
let new_external_docids = SetBuf::from_dirty(external_docids);
|
||||||
let mut internal_docids = Vec::new();
|
let mut internal_docids = Vec::new();
|
||||||
|
|
||||||
let user_ids = index.main.external_docids(writer)?;
|
let old_external_docids = index.main.external_docids(writer)?;
|
||||||
for userid in new_external_docids.as_slice() {
|
for external_docid in new_external_docids.as_slice() {
|
||||||
if let Some(id) = user_ids.get(userid) {
|
if let Some(id) = old_external_docids.get(external_docid) {
|
||||||
internal_docids.push(DocumentId(id as u32));
|
internal_docids.push(DocumentId(id as u32));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -98,16 +98,16 @@ pub fn value_to_number(value: &Value) -> Option<Number> {
|
|||||||
/// Validates a string representation to be a correct document id and returns
|
/// Validates a string representation to be a correct document id and returns
|
||||||
/// the corresponding id or generate a new one, this is the way we produce documents ids.
|
/// the corresponding id or generate a new one, this is the way we produce documents ids.
|
||||||
pub fn discover_document_id(
|
pub fn discover_document_id(
|
||||||
userid: &str,
|
docid: &str,
|
||||||
user_ids: &fst::Map,
|
external_docids: &fst::Map,
|
||||||
available_ids: &mut DiscoverIds<'_>,
|
available_docids: &mut DiscoverIds<'_>,
|
||||||
) -> Result<DocumentId, SerializerError>
|
) -> Result<DocumentId, SerializerError>
|
||||||
{
|
{
|
||||||
if userid.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') {
|
if docid.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') {
|
||||||
match user_ids.get(userid) {
|
match external_docids.get(docid) {
|
||||||
Some(id) => Ok(DocumentId(id as u32)),
|
Some(id) => Ok(DocumentId(id as u32)),
|
||||||
None => {
|
None => {
|
||||||
let internal_id = available_ids.next().expect("no more ids available");
|
let internal_id = available_docids.next().expect("no more ids available");
|
||||||
Ok(internal_id)
|
Ok(internal_id)
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
@ -120,18 +120,18 @@ pub fn discover_document_id(
|
|||||||
pub fn extract_document_id(
|
pub fn extract_document_id(
|
||||||
primary_key: &str,
|
primary_key: &str,
|
||||||
document: &IndexMap<String, Value>,
|
document: &IndexMap<String, Value>,
|
||||||
user_ids: &fst::Map,
|
external_docids: &fst::Map,
|
||||||
available_ids: &mut DiscoverIds<'_>,
|
available_docids: &mut DiscoverIds<'_>,
|
||||||
) -> Result<(DocumentId, String), SerializerError>
|
) -> Result<(DocumentId, String), SerializerError>
|
||||||
{
|
{
|
||||||
match document.get(primary_key) {
|
match document.get(primary_key) {
|
||||||
Some(value) => {
|
Some(value) => {
|
||||||
let userid = match value {
|
let docid = match value {
|
||||||
Value::Number(number) => number.to_string(),
|
Value::Number(number) => number.to_string(),
|
||||||
Value::String(string) => string.clone(),
|
Value::String(string) => string.clone(),
|
||||||
_ => return Err(SerializerError::InvalidDocumentIdFormat),
|
_ => return Err(SerializerError::InvalidDocumentIdFormat),
|
||||||
};
|
};
|
||||||
discover_document_id(&userid, user_ids, available_ids).map(|id| (id, userid))
|
discover_document_id(&docid, external_docids, available_docids).map(|id| (id, docid))
|
||||||
}
|
}
|
||||||
None => Err(SerializerError::DocumentIdNotFound),
|
None => Err(SerializerError::DocumentIdNotFound),
|
||||||
}
|
}
|
||||||
|
@ -44,12 +44,9 @@ async fn get_document(
|
|||||||
.ok_or(ResponseError::index_not_found(&path.index_uid))?;
|
.ok_or(ResponseError::index_not_found(&path.index_uid))?;
|
||||||
|
|
||||||
let reader = data.db.main_read_txn()?;
|
let reader = data.db.main_read_txn()?;
|
||||||
let internal_id = index.main.external_to_internal_docid(&reader, &path.document_id)?;
|
let internal_id = index.main
|
||||||
|
.external_to_internal_docid(&reader, &path.document_id)?
|
||||||
let internal_id = match internal_id {
|
.ok_or(ResponseError::document_not_found(&path.document_id))?;
|
||||||
Some(internal_id) => internal_id,
|
|
||||||
None => return Err(ResponseError::document_not_found(&path.document_id)),
|
|
||||||
};
|
|
||||||
|
|
||||||
let response: Document = index
|
let response: Document = index
|
||||||
.document(&reader, None, internal_id)?
|
.document(&reader, None, internal_id)?
|
||||||
|
Loading…
x
Reference in New Issue
Block a user