mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-03 20:07:09 +02:00
Fix some tests but not all of them
This commit is contained in:
parent
670aff5553
commit
aba8a0e9e0
20 changed files with 1211 additions and 881 deletions
|
@ -150,11 +150,24 @@ pub fn objects_from_json_value(json: serde_json::Value) -> Vec<crate::Object> {
|
|||
macro_rules! documents {
|
||||
($data:tt) => {{
|
||||
let documents = serde_json::json!($data);
|
||||
let documents = $crate::documents::objects_from_json_value(documents);
|
||||
$crate::documents::documents_batch_reader_from_objects(documents)
|
||||
let mut file = tempfile::tempfile().unwrap();
|
||||
for document in documents.as_array().unwrap() {
|
||||
serde_json::to_writer(&mut file, &document).unwrap();
|
||||
}
|
||||
file.sync_all().unwrap();
|
||||
unsafe { memmap2::Mmap::map(&file).unwrap() }
|
||||
}};
|
||||
}
|
||||
|
||||
pub fn mmap_from_objects(objects: impl IntoIterator<Item = Object>) -> memmap2::Mmap {
|
||||
let mut writer = tempfile::tempfile().map(std::io::BufWriter::new).unwrap();
|
||||
for object in objects {
|
||||
serde_json::to_writer(&mut writer, &object).unwrap();
|
||||
}
|
||||
let file = writer.into_inner().unwrap();
|
||||
unsafe { memmap2::Mmap::map(&file).unwrap() }
|
||||
}
|
||||
|
||||
pub fn documents_batch_reader_from_objects(
|
||||
objects: impl IntoIterator<Item = Object>,
|
||||
) -> DocumentsBatchReader<std::io::Cursor<Vec<u8>>> {
|
||||
|
@ -224,20 +237,6 @@ mod test {
|
|||
assert!(documents.next_document().unwrap().is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_nested() {
|
||||
let docs_reader = documents!([{
|
||||
"hello": {
|
||||
"toto": ["hello"]
|
||||
}
|
||||
}]);
|
||||
|
||||
let (mut cursor, _) = docs_reader.into_cursor_and_fields_index();
|
||||
let doc = cursor.next_document().unwrap().unwrap();
|
||||
let nested: Value = serde_json::from_slice(doc.get(0).unwrap()).unwrap();
|
||||
assert_eq!(nested, json!({ "toto": ["hello"] }));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn out_of_order_json_fields() {
|
||||
let _documents = documents!([
|
||||
|
|
|
@ -1680,19 +1680,23 @@ pub(crate) mod tests {
|
|||
use std::ops::Deref;
|
||||
|
||||
use big_s::S;
|
||||
use bumpalo::Bump;
|
||||
use heed::{EnvOpenOptions, RwTxn};
|
||||
use maplit::{btreemap, hashset};
|
||||
use memmap2::Mmap;
|
||||
use tempfile::TempDir;
|
||||
|
||||
use crate::documents::DocumentsBatchReader;
|
||||
use crate::error::{Error, InternalError};
|
||||
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
|
||||
use crate::update::new::indexer;
|
||||
use crate::update::{
|
||||
self, IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Setting,
|
||||
Settings,
|
||||
self, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Setting, Settings,
|
||||
};
|
||||
use crate::vector::settings::{EmbedderSource, EmbeddingSettings};
|
||||
use crate::{db_snap, obkv_to_json, Filter, Index, Search, SearchResult};
|
||||
use crate::vector::EmbeddingConfigs;
|
||||
use crate::{
|
||||
db_snap, obkv_to_json, Filter, Index, Search, SearchResult, ThreadPoolNoAbortBuilder,
|
||||
};
|
||||
|
||||
pub(crate) struct TempIndex {
|
||||
pub inner: Index,
|
||||
|
@ -1725,35 +1729,60 @@ pub(crate) mod tests {
|
|||
pub fn new() -> Self {
|
||||
Self::new_with_map_size(4096 * 2000)
|
||||
}
|
||||
pub fn add_documents_using_wtxn<'t, R>(
|
||||
|
||||
pub fn add_documents_using_wtxn<'t>(
|
||||
&'t self,
|
||||
wtxn: &mut RwTxn<'t>,
|
||||
documents: DocumentsBatchReader<R>,
|
||||
) -> Result<(), crate::error::Error>
|
||||
where
|
||||
R: std::io::Read + std::io::Seek,
|
||||
{
|
||||
let builder = IndexDocuments::new(
|
||||
wtxn,
|
||||
self,
|
||||
&self.indexer_config,
|
||||
self.index_documents_config.clone(),
|
||||
|_| (),
|
||||
|| false,
|
||||
)
|
||||
.unwrap();
|
||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||
user_error?;
|
||||
builder.execute()?;
|
||||
documents: Mmap,
|
||||
) -> Result<(), crate::error::Error> {
|
||||
let local_pool;
|
||||
let indexer_config = &self.indexer_config;
|
||||
let pool = match &indexer_config.thread_pool {
|
||||
Some(pool) => pool,
|
||||
None => {
|
||||
local_pool = ThreadPoolNoAbortBuilder::new().build().unwrap();
|
||||
&local_pool
|
||||
}
|
||||
};
|
||||
|
||||
let rtxn = self.inner.read_txn()?;
|
||||
let db_fields_ids_map = self.inner.fields_ids_map(&rtxn)?;
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let embedders = EmbeddingConfigs::default();
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer.into_changes(
|
||||
&indexer_alloc,
|
||||
&self.inner,
|
||||
&rtxn,
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
)?;
|
||||
|
||||
pool.install(|| {
|
||||
indexer::index(
|
||||
wtxn,
|
||||
&self.inner,
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
embedders,
|
||||
&|| false,
|
||||
&|_| (),
|
||||
)
|
||||
})
|
||||
.unwrap()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
pub fn add_documents<R>(
|
||||
&self,
|
||||
documents: DocumentsBatchReader<R>,
|
||||
) -> Result<(), crate::error::Error>
|
||||
where
|
||||
R: std::io::Read + std::io::Seek,
|
||||
{
|
||||
|
||||
pub fn add_documents(&self, documents: Mmap) -> Result<(), crate::error::Error> {
|
||||
let mut wtxn = self.write_txn().unwrap();
|
||||
self.add_documents_using_wtxn(&mut wtxn, documents)?;
|
||||
wtxn.commit().unwrap();
|
||||
|
@ -1769,6 +1798,7 @@ pub(crate) mod tests {
|
|||
wtxn.commit().unwrap();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn update_settings_using_wtxn<'t>(
|
||||
&'t self,
|
||||
wtxn: &mut RwTxn<'t>,
|
||||
|
@ -1784,19 +1814,54 @@ pub(crate) mod tests {
|
|||
&'t self,
|
||||
wtxn: &mut RwTxn<'t>,
|
||||
external_document_ids: Vec<String>,
|
||||
) {
|
||||
let builder = IndexDocuments::new(
|
||||
wtxn,
|
||||
self,
|
||||
&self.indexer_config,
|
||||
self.index_documents_config.clone(),
|
||||
|_| (),
|
||||
|| false,
|
||||
)
|
||||
.unwrap();
|
||||
let (builder, user_error) = builder.remove_documents(external_document_ids).unwrap();
|
||||
user_error.unwrap();
|
||||
builder.execute().unwrap();
|
||||
) -> Result<(), crate::error::Error> {
|
||||
let local_pool;
|
||||
let indexer_config = &self.indexer_config;
|
||||
let pool = match &indexer_config.thread_pool {
|
||||
Some(pool) => pool,
|
||||
None => {
|
||||
local_pool = ThreadPoolNoAbortBuilder::new().build().unwrap();
|
||||
&local_pool
|
||||
}
|
||||
};
|
||||
|
||||
let rtxn = self.inner.read_txn()?;
|
||||
let db_fields_ids_map = self.inner.fields_ids_map(&rtxn)?;
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let embedders = EmbeddingConfigs::default();
|
||||
let mut indexer =
|
||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let external_document_ids: Vec<_> =
|
||||
external_document_ids.iter().map(AsRef::as_ref).collect();
|
||||
indexer.delete_documents(external_document_ids.as_slice());
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer.into_changes(
|
||||
&indexer_alloc,
|
||||
&self.inner,
|
||||
&rtxn,
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
)?;
|
||||
|
||||
pool.install(|| {
|
||||
indexer::index(
|
||||
wtxn,
|
||||
&self.inner,
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
embedders,
|
||||
&|| false,
|
||||
&|_| (),
|
||||
)
|
||||
})
|
||||
.unwrap()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn delete_documents(&self, external_document_ids: Vec<String>) {
|
||||
|
@ -1819,29 +1884,55 @@ pub(crate) mod tests {
|
|||
|
||||
let index = TempIndex::new();
|
||||
let mut wtxn = index.inner.write_txn().unwrap();
|
||||
|
||||
let should_abort = AtomicBool::new(false);
|
||||
let builder = IndexDocuments::new(
|
||||
&mut wtxn,
|
||||
&index.inner,
|
||||
&index.indexer_config,
|
||||
index.index_documents_config.clone(),
|
||||
|_| (),
|
||||
|| should_abort.load(Relaxed),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let (builder, user_error) = builder
|
||||
.add_documents(documents!([
|
||||
{ "id": 1, "name": "kevin" },
|
||||
{ "id": 2, "name": "bob", "age": 20 },
|
||||
{ "id": 2, "name": "bob", "age": 20 },
|
||||
]))
|
||||
let local_pool;
|
||||
let indexer_config = &index.indexer_config;
|
||||
let pool = match &indexer_config.thread_pool {
|
||||
Some(pool) => pool,
|
||||
None => {
|
||||
local_pool = ThreadPoolNoAbortBuilder::new().build().unwrap();
|
||||
&local_pool
|
||||
}
|
||||
};
|
||||
|
||||
let rtxn = index.inner.read_txn().unwrap();
|
||||
let db_fields_ids_map = index.inner.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let embedders = EmbeddingConfigs::default();
|
||||
let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let payload = documents!([
|
||||
{ "id": 1, "name": "kevin" },
|
||||
{ "id": 2, "name": "bob", "age": 20 },
|
||||
{ "id": 2, "name": "bob", "age": 20 },
|
||||
]);
|
||||
indexer.add_documents(&payload);
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
.into_changes(&indexer_alloc, &index.inner, &rtxn, None, &mut new_fields_ids_map)
|
||||
.unwrap();
|
||||
user_error.unwrap();
|
||||
|
||||
should_abort.store(true, Relaxed);
|
||||
let err = builder.execute().unwrap_err();
|
||||
|
||||
let err = pool
|
||||
.install(|| {
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index.inner,
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
embedders,
|
||||
&|| should_abort.load(Relaxed),
|
||||
&|_| (),
|
||||
)
|
||||
})
|
||||
.unwrap()
|
||||
.unwrap_err();
|
||||
|
||||
assert!(matches!(err, Error::InternalError(InternalError::AbortedIndexation)));
|
||||
}
|
||||
|
|
|
@ -407,7 +407,7 @@ mod tests {
|
|||
use big_s::S;
|
||||
use maplit::hashset;
|
||||
|
||||
use crate::documents::documents_batch_reader_from_objects;
|
||||
use crate::documents::mmap_from_objects;
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::{milli_snap, FacetDistribution, OrderBy};
|
||||
|
||||
|
@ -508,8 +508,7 @@ mod tests {
|
|||
documents.push(document);
|
||||
}
|
||||
|
||||
let documents = documents_batch_reader_from_objects(documents);
|
||||
|
||||
let documents = mmap_from_objects(documents);
|
||||
index.add_documents(documents).unwrap();
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
@ -594,8 +593,7 @@ mod tests {
|
|||
documents.push(document);
|
||||
}
|
||||
|
||||
let documents = documents_batch_reader_from_objects(documents);
|
||||
|
||||
let documents = mmap_from_objects(documents);
|
||||
index.add_documents(documents).unwrap();
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
@ -654,8 +652,7 @@ mod tests {
|
|||
documents.push(document);
|
||||
}
|
||||
|
||||
let documents = documents_batch_reader_from_objects(documents);
|
||||
|
||||
let documents = mmap_from_objects(documents);
|
||||
index.add_documents(documents).unwrap();
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
@ -706,8 +703,7 @@ mod tests {
|
|||
documents.push(document);
|
||||
}
|
||||
|
||||
let documents = documents_batch_reader_from_objects(documents);
|
||||
|
||||
let documents = mmap_from_objects(documents);
|
||||
index.add_documents(documents).unwrap();
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
@ -758,8 +754,7 @@ mod tests {
|
|||
documents.push(document);
|
||||
}
|
||||
|
||||
let documents = documents_batch_reader_from_objects(documents);
|
||||
|
||||
let documents = mmap_from_objects(documents);
|
||||
index.add_documents(documents).unwrap();
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
@ -814,8 +809,7 @@ mod tests {
|
|||
documents.push(document);
|
||||
}
|
||||
|
||||
let documents = documents_batch_reader_from_objects(documents);
|
||||
|
||||
let documents = mmap_from_objects(documents);
|
||||
index.add_documents(documents).unwrap();
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
|
|
@ -1,11 +1,16 @@
|
|||
use std::io::Cursor;
|
||||
use std::io::Write;
|
||||
|
||||
use big_s::S;
|
||||
use bumpalo::Bump;
|
||||
use heed::EnvOpenOptions;
|
||||
use maplit::{btreemap, hashset};
|
||||
|
||||
use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||
use crate::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
||||
use crate::update::new::indexer;
|
||||
use crate::update::{
|
||||
IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings,
|
||||
};
|
||||
use crate::vector::EmbeddingConfigs;
|
||||
use crate::{db_snap, Criterion, Index, Object};
|
||||
pub const CONTENT: &str = include_str!("../../../../tests/assets/test_set.ndjson");
|
||||
|
||||
|
@ -16,6 +21,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
|||
let index = Index::new(options, &path).unwrap();
|
||||
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let config = IndexerConfig::default();
|
||||
|
||||
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||
|
@ -43,27 +49,41 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
|||
|
||||
// index documents
|
||||
let config = IndexerConfig { max_memory: Some(10 * 1024 * 1024), ..Default::default() };
|
||||
let indexing_config = IndexDocumentsConfig::default();
|
||||
|
||||
let builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| (), || false).unwrap();
|
||||
let mut documents_builder = DocumentsBatchBuilder::new(Vec::new());
|
||||
let reader = Cursor::new(CONTENT.as_bytes());
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
for result in serde_json::Deserializer::from_reader(reader).into_iter::<Object>() {
|
||||
let object = result.unwrap();
|
||||
documents_builder.append_json_object(&object).unwrap();
|
||||
}
|
||||
let embedders = EmbeddingConfigs::default();
|
||||
let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
|
||||
let vector = documents_builder.into_inner().unwrap();
|
||||
let mut file = tempfile::tempfile().unwrap();
|
||||
file.write_all(CONTENT.as_bytes()).unwrap();
|
||||
file.sync_all().unwrap();
|
||||
let payload = unsafe { memmap2::Mmap::map(&file).unwrap() };
|
||||
|
||||
// index documents
|
||||
let content = DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap();
|
||||
let (builder, user_error) = builder.add_documents(content).unwrap();
|
||||
user_error.unwrap();
|
||||
builder.execute().unwrap();
|
||||
indexer.add_documents(&payload).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) =
|
||||
indexer.into_changes(&indexer_alloc, &index, &rtxn, None, &mut new_fields_ids_map).unwrap();
|
||||
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
embedders,
|
||||
&|| false,
|
||||
&|_| (),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
wtxn.commit().unwrap();
|
||||
drop(rtxn);
|
||||
|
||||
index
|
||||
}
|
||||
|
|
|
@ -369,7 +369,7 @@ mod tests {
|
|||
use maplit::hashset;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::documents::documents_batch_reader_from_objects;
|
||||
use crate::documents::{documents_batch_reader_from_objects, mmap_from_objects};
|
||||
use crate::heed_codec::facet::OrderedF64Codec;
|
||||
use crate::heed_codec::StrRefCodec;
|
||||
use crate::index::tests::TempIndex;
|
||||
|
@ -492,8 +492,8 @@ mod tests {
|
|||
);
|
||||
}
|
||||
|
||||
let documents = documents_batch_reader_from_objects(documents);
|
||||
index.add_documents(documents).unwrap();
|
||||
let documents = mmap_from_objects(documents);
|
||||
index.add_documents(documents);
|
||||
|
||||
db_snap!(index, facet_id_f64_docids, "initial", @"c34f499261f3510d862fa0283bbe843a");
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,86 +0,0 @@
|
|||
use heed::types::Bytes;
|
||||
use heed::{Database, RoTxn};
|
||||
use obkv::KvReaderU16;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::{all_obkv_to_json, DocumentId, FieldsIdsMap, Object, ObkvCodec, Result, BEU32};
|
||||
|
||||
pub struct ImmutableObkvs<'t> {
|
||||
ids: RoaringBitmap,
|
||||
fields_ids_map: FieldsIdsMap,
|
||||
slices: Vec<&'t [u8]>,
|
||||
}
|
||||
|
||||
impl<'t> ImmutableObkvs<'t> {
|
||||
/// Creates the structure by fetching all the OBKVs
|
||||
/// and keeping the transaction making the pointers valid.
|
||||
pub fn new(
|
||||
rtxn: &'t RoTxn,
|
||||
documents_database: Database<BEU32, ObkvCodec>,
|
||||
fields_ids_map: FieldsIdsMap,
|
||||
subset: RoaringBitmap,
|
||||
) -> heed::Result<Self> {
|
||||
let mut slices = Vec::new();
|
||||
let documents_database = documents_database.remap_data_type::<Bytes>();
|
||||
for docid in &subset {
|
||||
let slice = documents_database.get(rtxn, &docid)?.unwrap();
|
||||
slices.push(slice);
|
||||
}
|
||||
|
||||
Ok(ImmutableObkvs { ids: subset, fields_ids_map, slices })
|
||||
}
|
||||
|
||||
/// Returns the OBKVs identified by the given ID.
|
||||
pub fn obkv(&self, docid: DocumentId) -> heed::Result<Option<&'t KvReaderU16>> {
|
||||
match self
|
||||
.ids
|
||||
.rank(docid)
|
||||
.checked_sub(1)
|
||||
.and_then(|offset| self.slices.get(offset as usize))
|
||||
{
|
||||
Some(&bytes) => Ok(Some(bytes.into())),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the owned rhai::Map identified by the given ID.
|
||||
pub fn rhai_map(&self, docid: DocumentId) -> Result<Option<rhai::Map>> {
|
||||
let obkv = match self.obkv(docid) {
|
||||
Ok(Some(obkv)) => obkv,
|
||||
Ok(None) => return Ok(None),
|
||||
Err(e) => return Err(e.into()),
|
||||
};
|
||||
|
||||
let all_keys = obkv.iter().map(|(k, _v)| k).collect::<Vec<_>>();
|
||||
let map: Result<rhai::Map> = all_keys
|
||||
.iter()
|
||||
.copied()
|
||||
.flat_map(|id| obkv.get(id).map(|value| (id, value)))
|
||||
.map(|(id, value)| {
|
||||
let name = self.fields_ids_map.name(id).ok_or(
|
||||
crate::error::FieldIdMapMissingEntry::FieldId {
|
||||
field_id: id,
|
||||
process: "all_obkv_to_rhaimap",
|
||||
},
|
||||
)?;
|
||||
let value = serde_json::from_slice(value)
|
||||
.map_err(crate::error::InternalError::SerdeJson)?;
|
||||
Ok((name.into(), value))
|
||||
})
|
||||
.collect();
|
||||
|
||||
map.map(Some)
|
||||
}
|
||||
|
||||
pub fn json_map(&self, docid: DocumentId) -> Result<Option<Object>> {
|
||||
let obkv = match self.obkv(docid) {
|
||||
Ok(Some(obkv)) => obkv,
|
||||
Ok(None) => return Ok(None),
|
||||
Err(e) => return Err(e.into()),
|
||||
};
|
||||
|
||||
all_obkv_to_json(obkv, &self.fields_ids_map).map(Some)
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl Sync for ImmutableObkvs<'_> {}
|
|
@ -48,23 +48,8 @@ pub struct TransformOutput {
|
|||
/// containing all those documents.
|
||||
pub struct Transform<'a, 'i> {
|
||||
pub index: &'i Index,
|
||||
fields_ids_map: FieldsIdsMap,
|
||||
|
||||
indexer_settings: &'a IndexerConfig,
|
||||
pub index_documents_method: IndexDocumentsMethod,
|
||||
available_documents_ids: AvailableIds,
|
||||
|
||||
// Both grenad follows the same format:
|
||||
// key | value
|
||||
// u32 | 1 byte for the Operation byte, the rest is the obkv of the document stored
|
||||
original_sorter: grenad::Sorter<EitherObkvMerge>,
|
||||
flattened_sorter: grenad::Sorter<EitherObkvMerge>,
|
||||
|
||||
replaced_documents_ids: RoaringBitmap,
|
||||
new_documents_ids: RoaringBitmap,
|
||||
// To increase the cache locality and decrease the heap usage we use compact smartstring.
|
||||
new_external_documents_ids_builder: FxHashMap<SmartString<smartstring::Compact>, u64>,
|
||||
documents_count: usize,
|
||||
}
|
||||
|
||||
/// This enum is specific to the grenad sorter stored in the transform.
|
||||
|
@ -75,29 +60,6 @@ pub enum Operation {
|
|||
Deletion,
|
||||
}
|
||||
|
||||
/// Create a mapping between the field ids found in the document batch and the one that were
|
||||
/// already present in the index.
|
||||
///
|
||||
/// If new fields are present in the addition, they are added to the index field ids map.
|
||||
fn create_fields_mapping(
|
||||
index_field_map: &mut FieldsIdsMap,
|
||||
batch_field_map: &DocumentsBatchIndex,
|
||||
) -> Result<HashMap<FieldId, FieldId>> {
|
||||
batch_field_map
|
||||
.iter()
|
||||
// we sort by id here to ensure a deterministic mapping of the fields, that preserves
|
||||
// the original ordering.
|
||||
.sorted_by_key(|(&id, _)| id)
|
||||
.map(|(field, name)| match index_field_map.id(name) {
|
||||
Some(id) => Ok((*field, id)),
|
||||
None => index_field_map
|
||||
.insert(name)
|
||||
.ok_or(Error::UserError(UserError::AttributeLimitReached))
|
||||
.map(|id| (*field, id)),
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
impl<'a, 'i> Transform<'a, 'i> {
|
||||
pub fn new(
|
||||
wtxn: &mut heed::RwTxn<'_>,
|
||||
|
@ -138,19 +100,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||
);
|
||||
let documents_ids = index.documents_ids(wtxn)?;
|
||||
|
||||
Ok(Transform {
|
||||
index,
|
||||
fields_ids_map: index.fields_ids_map(wtxn)?,
|
||||
indexer_settings,
|
||||
available_documents_ids: AvailableIds::new(&documents_ids),
|
||||
original_sorter,
|
||||
flattened_sorter,
|
||||
index_documents_method,
|
||||
replaced_documents_ids: RoaringBitmap::new(),
|
||||
new_documents_ids: RoaringBitmap::new(),
|
||||
new_external_documents_ids_builder: FxHashMap::default(),
|
||||
documents_count: 0,
|
||||
})
|
||||
Ok(Transform { index, indexer_settings, index_documents_method })
|
||||
}
|
||||
|
||||
// Flatten a document from the fields ids map contained in self and insert the new
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue