mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 20:37:15 +02:00
Fix many indexing and searching related bugs
This commit is contained in:
parent
2236ebbd42
commit
d8d0442d63
9 changed files with 99 additions and 28 deletions
|
@ -1,5 +1,5 @@
|
|||
use std::sync::Arc;
|
||||
use rkv::Value;
|
||||
use rkv::{Value, StoreError};
|
||||
use crate::{DocumentId, MResult};
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
|
@ -24,10 +24,14 @@ impl DocsWords {
|
|||
&self,
|
||||
writer: &mut rkv::Writer,
|
||||
document_id: DocumentId,
|
||||
) -> Result<(), rkv::StoreError>
|
||||
) -> Result<bool, rkv::StoreError>
|
||||
{
|
||||
let document_id_bytes = document_id.0.to_be_bytes();
|
||||
self.docs_words.delete(writer, document_id_bytes)
|
||||
match self.docs_words.delete(writer, document_id_bytes) {
|
||||
Ok(()) => Ok(true),
|
||||
Err(StoreError::LmdbError(lmdb::Error::NotFound)) => Ok(false),
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn doc_words<T: rkv::Readable>(
|
||||
|
|
|
@ -18,6 +18,20 @@ fn document_attribute_into_key(document_id: DocumentId, attribute: SchemaAttr) -
|
|||
key
|
||||
}
|
||||
|
||||
fn document_attribute_from_key(key: [u8; 10]) -> (DocumentId, SchemaAttr) {
|
||||
let document_id = {
|
||||
let array = TryFrom::try_from(&key[0..8]).unwrap();
|
||||
DocumentId(u64::from_be_bytes(array))
|
||||
};
|
||||
|
||||
let schema_attr = {
|
||||
let array = TryFrom::try_from(&key[8..8+2]).unwrap();
|
||||
SchemaAttr(u16::from_be_bytes(array))
|
||||
};
|
||||
|
||||
(document_id, schema_attr)
|
||||
}
|
||||
|
||||
impl DocumentsFields {
|
||||
pub fn put_document_field(
|
||||
&self,
|
||||
|
@ -45,13 +59,10 @@ impl DocumentsFields {
|
|||
let iter = self.documents_fields.iter_from(writer, document_id_bytes)?;
|
||||
for result in iter {
|
||||
let (key, _) = result?;
|
||||
let current_document_id = {
|
||||
let bytes = key.get(0..8).unwrap();
|
||||
let array = TryFrom::try_from(bytes).unwrap();
|
||||
DocumentId(u64::from_be_bytes(array))
|
||||
};
|
||||
|
||||
let array = TryFrom::try_from(key).unwrap();
|
||||
let (current_document_id, _) = document_attribute_from_key(array);
|
||||
if current_document_id != document_id { break }
|
||||
|
||||
keys_to_delete.push(key.to_owned());
|
||||
}
|
||||
|
||||
|
@ -103,10 +114,10 @@ impl<'r, T: rkv::Readable + 'r> Iterator for DocumentFieldsIter<'r, T> {
|
|||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match self.iter.next() {
|
||||
Some(Ok((key, Some(rkv::Value::Blob(bytes))))) => {
|
||||
let key_bytes = key.get(8..8+2).unwrap();
|
||||
let array = TryFrom::try_from(key_bytes).unwrap();
|
||||
let attr = u16::from_be_bytes(array);
|
||||
let attr = SchemaAttr::new(attr);
|
||||
let array = TryFrom::try_from(key).unwrap();
|
||||
let (current_document_id, attr) = document_attribute_from_key(array);
|
||||
if current_document_id != self.document_id { return None; }
|
||||
|
||||
Some(Ok((attr, bytes)))
|
||||
},
|
||||
Some(Ok((key, data))) => panic!("{:?}, {:?}", key, data),
|
||||
|
|
|
@ -14,8 +14,11 @@ pub use self::synonyms::Synonyms;
|
|||
pub use self::updates::Updates;
|
||||
pub use self::updates_results::UpdatesResults;
|
||||
|
||||
use std::collections::HashSet;
|
||||
use meilidb_schema::Schema;
|
||||
use crate::{update, query_builder::QueryBuilder, MResult};
|
||||
use serde::de;
|
||||
use crate::{update, query_builder::QueryBuilder, DocumentId, MResult, Error};
|
||||
use crate::serde::Deserializer;
|
||||
|
||||
fn aligned_to(bytes: &[u8], align: usize) -> bool {
|
||||
(bytes as *const _ as *const () as usize) % align == 0
|
||||
|
@ -63,6 +66,34 @@ pub struct Index {
|
|||
}
|
||||
|
||||
impl Index {
|
||||
pub fn document<T: de::DeserializeOwned, R: rkv::Readable>(
|
||||
&self,
|
||||
reader: &R,
|
||||
fields: Option<&HashSet<&str>>,
|
||||
document_id: DocumentId,
|
||||
) -> MResult<Option<T>>
|
||||
{
|
||||
let schema = self.main.schema(reader)?;
|
||||
let schema = schema.ok_or(Error::SchemaMissing)?;
|
||||
|
||||
let fields = match fields {
|
||||
Some(fields) => fields.into_iter().map(|name| schema.attribute(name)).collect(),
|
||||
None => None,
|
||||
};
|
||||
|
||||
let mut deserializer = Deserializer {
|
||||
document_id,
|
||||
reader,
|
||||
documents_fields: self.documents_fields,
|
||||
schema: &schema,
|
||||
fields: fields.as_ref(),
|
||||
};
|
||||
|
||||
// TODO: currently we return an error if all document fields are missing,
|
||||
// returning None would have been better
|
||||
Ok(T::deserialize(&mut deserializer).map(Some)?)
|
||||
}
|
||||
|
||||
pub fn schema_update(&self, mut writer: rkv::Writer, schema: Schema) -> MResult<()> {
|
||||
update::push_schema_update(&mut writer, self.updates, self.updates_results, schema)?;
|
||||
writer.commit()?;
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
use std::borrow::Cow;
|
||||
use std::{mem, ptr};
|
||||
|
||||
use zerocopy::{AsBytes, LayoutVerified};
|
||||
use rkv::StoreError;
|
||||
|
||||
use crate::DocIndex;
|
||||
use crate::store::aligned_to;
|
||||
|
@ -26,9 +28,13 @@ impl PostingsLists {
|
|||
&self,
|
||||
writer: &mut rkv::Writer,
|
||||
word: &[u8],
|
||||
) -> Result<(), rkv::StoreError>
|
||||
) -> Result<bool, rkv::StoreError>
|
||||
{
|
||||
self.postings_lists.delete(writer, word)
|
||||
match self.postings_lists.delete(writer, word) {
|
||||
Ok(()) => Ok(true),
|
||||
Err(StoreError::LmdbError(lmdb::Error::NotFound)) => Ok(false),
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn postings_list<'a>(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue