MeiliSearch/milli/src/update/index_documents/mod.rs

mod extract;
mod helpers;
mod transform;
mod typed_chunk;

use std::collections::HashSet;
use std::io::{Cursor, Read, Seek};
use std::iter::FromIterator;
use std::num::{NonZeroU32, NonZeroUsize};

use crossbeam_channel::{Receiver, Sender};
use heed::types::Str;
use heed::Database;
use log::debug;
use roaring::RoaringBitmap;
use serde::{Deserialize, Serialize};
use slice_group_by::GroupBy;
use typed_chunk::{write_typed_chunk_into_index, TypedChunk};

pub use self::helpers::{
    as_cloneable_grenad, create_sorter, create_writer, fst_stream_into_hashset,
    fst_stream_into_vec, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps,
    sorter_into_lmdb_database, valid_lmdb_key, write_into_lmdb_database, writer_into_reader,
    ClonableMmap, MergeFn,
};
use self::helpers::{grenad_obkv_into_chunks, GrenadParameters};
pub use self::transform::{Transform, TransformOutput};
use crate::documents::DocumentBatchReader;
pub use crate::update::index_documents::helpers::CursorClonableMmap;
use crate::update::{
    self, Facets, IndexerConfig, UpdateIndexingStep, WordPrefixDocids,
    WordPrefixPairProximityDocids, WordPrefixPositionDocids, WordsPrefixesFst,
};
use crate::{Index, Result, RoaringBitmapCodec, UserError};

static MERGED_DATABASE_COUNT: usize = 7;
static PREFIX_DATABASE_COUNT: usize = 5;
static TOTAL_POSTING_DATABASE_COUNT: usize = MERGED_DATABASE_COUNT + PREFIX_DATABASE_COUNT;

#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct DocumentAdditionResult {
    /// The number of documents that were indexed during the update
    pub indexed_documents: u64,
    /// The total number of documents in the index after the update
    pub number_of_documents: u64,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[non_exhaustive]
pub enum IndexDocumentsMethod {
    /// Replace the previous document with the new one,
    /// removing all the already known attributes.
    ReplaceDocuments,

    /// Merge the previous version of the document with the new version,
    /// replacing old attributes values with the new ones and add the new attributes.
    UpdateDocuments,
}

impl Default for IndexDocumentsMethod {
    fn default() -> Self {
        Self::ReplaceDocuments
    }
}

pub struct IndexDocuments<'t, 'u, 'i, 'a, F> {
    wtxn: &'t mut heed::RwTxn<'i, 'u>,
    index: &'i Index,
    config: IndexDocumentsConfig,
    indexer_config: &'a IndexerConfig,
    transform: Option<Transform<'a, 'i>>,
    progress: F,
    added_documents: u64,
}

#[derive(Default, Debug, Clone)]
pub struct IndexDocumentsConfig {
    pub facet_level_group_size: Option<NonZeroUsize>,
    pub facet_min_level_size: Option<NonZeroUsize>,
    pub words_prefix_threshold: Option<u32>,
    pub max_prefix_length: Option<usize>,
    pub words_positions_level_group_size: Option<NonZeroU32>,
    pub words_positions_min_level_size: Option<NonZeroU32>,
    pub update_method: IndexDocumentsMethod,
    pub autogenerate_docids: bool,
}

impl<'t, 'u, 'i, 'a, F> IndexDocuments<'t, 'u, 'i, 'a, F>
where
    F: Fn(UpdateIndexingStep) + Sync,
{
    pub fn new(
        wtxn: &'t mut heed::RwTxn<'i, 'u>,
        index: &'i Index,
        indexer_config: &'a IndexerConfig,
        config: IndexDocumentsConfig,
        progress: F,
    ) -> Result<IndexDocuments<'t, 'u, 'i, 'a, F>> {
        let transform = Some(Transform::new(
            wtxn,
            &index,
            indexer_config,
            config.update_method,
            config.autogenerate_docids,
        )?);

        Ok(IndexDocuments {
            transform,
            config,
            indexer_config,
            progress,
            wtxn,
            index,
            added_documents: 0,
        })
    }

    /// Adds a batch of documents to the current builder.
    ///
    /// Since the documents are progressively added to the writer, a failure will cause a stale
    /// builder, and the builder must be discarded.
    ///
    /// Returns the number of documents added to the builder.
    pub fn add_documents<R>(&mut self, reader: DocumentBatchReader<R>) -> Result<u64>
    where
        R: Read + Seek,
    {
        // Early return when there is no document to add
        if reader.is_empty() {
            return Ok(0);
        }

        let indexed_documents = self
            .transform
            .as_mut()
            .expect("Invalid document addition state")
            .read_documents(reader, self.wtxn, &self.progress)?
            as u64;

        self.added_documents += indexed_documents;

        Ok(indexed_documents)
    }

    #[logging_timer::time("IndexDocuments::{}")]
    pub fn execute(mut self) -> Result<DocumentAdditionResult> {
        if self.added_documents == 0 {
            let number_of_documents = self.index.number_of_documents(self.wtxn)?;
            return Ok(DocumentAdditionResult { indexed_documents: 0, number_of_documents });
        }
        let output = self
            .transform
            .take()
            .expect("Invalid document addition state")
            .output_from_sorter(self.wtxn, &self.progress)?;

        let new_facets = output.compute_real_facets(self.wtxn, self.index)?;
        self.index.put_faceted_fields(self.wtxn, &new_facets)?;

        // in case new fields were introduced we're going to recreate the searchable fields.
        if let Some(faceted_fields) = self.index.user_defined_searchable_fields(self.wtxn)? {
            // we can't keep references on the faceted fields while we update the index thus we need to own it.
            let faceted_fields: Vec<String> =
                faceted_fields.into_iter().map(str::to_string).collect();
            self.index.put_all_searchable_fields_from_fields_ids_map(
                self.wtxn,
                &faceted_fields.iter().map(String::as_ref).collect::<Vec<_>>(),
                &output.fields_ids_map,
            )?;
        }

        let indexed_documents = output.documents_count as u64;
        let number_of_documents = self.execute_raw(output)?;

        Ok(DocumentAdditionResult { indexed_documents, number_of_documents })
    }

    /// Returns the total number of documents in the index after the update.
    #[logging_timer::time("IndexDocuments::{}")]
    pub fn execute_raw(self, output: TransformOutput) -> Result<u64>
    where
        F: Fn(UpdateIndexingStep) + Sync,
    {
        let TransformOutput {
            primary_key,
            fields_ids_map,
            field_distribution,
            external_documents_ids,
            new_documents_ids,
            replaced_documents_ids,
            documents_count,
            original_documents,
            flattened_documents,
        } = output;

        // The fields_ids_map is put back to the store now so the rest of the transaction sees an
        // up to date field map.
        self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;

        let backup_pool;
        let pool = match self.indexer_config.thread_pool {
            Some(ref pool) => pool,
            #[cfg(not(test))]
            None => {
                // We initialize a bakcup pool with the default
                // settings if none have already been set.
                backup_pool = rayon::ThreadPoolBuilder::new().build()?;
                &backup_pool
            }
            #[cfg(test)]
            None => {
                // We initialize a bakcup pool with the default
                // settings if none have already been set.
                backup_pool = rayon::ThreadPoolBuilder::new().num_threads(1).build()?;
                &backup_pool
            }
        };

        let original_documents = grenad::Reader::new(original_documents)?;
        let flattened_documents = grenad::Reader::new(flattened_documents)?;

        // create LMDB writer channel
        let (lmdb_writer_sx, lmdb_writer_rx): (
            Sender<Result<TypedChunk>>,
            Receiver<Result<TypedChunk>>,
        ) = crossbeam_channel::unbounded();

        // get the primary key field id
        let primary_key_id = fields_ids_map.id(&primary_key).unwrap();

        // get searchable fields for word databases
        let searchable_fields =
            self.index.searchable_fields_ids(self.wtxn)?.map(HashSet::from_iter);
        // get filterable fields for facet databases
        let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?;
        // get the fid of the `_geo.lat` and `_geo.lng` fields.
        let geo_fields_ids = match self.index.fields_ids_map(self.wtxn)?.id("_geo") {
            Some(gfid) => {
                let is_sortable = self.index.sortable_fields_ids(self.wtxn)?.contains(&gfid);
                let is_filterable = self.index.filterable_fields_ids(self.wtxn)?.contains(&gfid);
                // if `_geo` is faceted then we get the `lat` and `lng`
                if is_sortable || is_filterable {
                    let field_ids = self
                        .index
                        .fields_ids_map(self.wtxn)?
                        .insert("_geo.lat")
                        .zip(self.index.fields_ids_map(self.wtxn)?.insert("_geo.lng"))
                        .ok_or(UserError::AttributeLimitReached)?;
                    Some(field_ids)
                } else {
                    None
                }
            }
            None => None,
        };

        let stop_words = self.index.stop_words(self.wtxn)?;
        let exact_attributes = self.index.exact_attributes_ids(self.wtxn)?;

        // Run extraction pipeline in parallel.
        pool.install(|| {
            let params = GrenadParameters {
                chunk_compression_type: self.indexer_config.chunk_compression_type,
                chunk_compression_level: self.indexer_config.chunk_compression_level,
                max_memory: self.indexer_config.max_memory,
                max_nb_chunks: self.indexer_config.max_nb_chunks, // default value, may be chosen.
            };

            // split obkv file into several chunks
            let original_chunk_iter = grenad_obkv_into_chunks(
                original_documents,
                params.clone(),
                self.indexer_config.documents_chunk_size.unwrap_or(1024 * 1024 * 4), // 4MiB
            );

            // split obkv file into several chunks
            let flattened_chunk_iter = grenad_obkv_into_chunks(
                flattened_documents,
                params.clone(),
                self.indexer_config.documents_chunk_size.unwrap_or(1024 * 1024 * 4), // 4MiB
            );

            let result = original_chunk_iter
                .and_then(|original_chunk_iter| Ok((original_chunk_iter, flattened_chunk_iter?)))
                .map(|(original_chunk, flattened_chunk)| {
                    // extract all databases from the chunked obkv douments
                    extract::data_from_obkv_documents(
                        original_chunk,
                        flattened_chunk,
                        params,
                        lmdb_writer_sx.clone(),
                        searchable_fields,
                        faceted_fields,
                        primary_key_id,
                        geo_fields_ids,
                        stop_words,
                        self.indexer_config.max_positions_per_attributes,
                        exact_attributes,
                    )
                });

            if let Err(e) = result {
                let _ = lmdb_writer_sx.send(Err(e));
            }

            // needs to be droped to avoid channel waiting lock.
            drop(lmdb_writer_sx)
        });

        // We delete the documents that this document addition replaces. This way we are
        // able to simply insert all the documents even if they already exist in the database.
        if !replaced_documents_ids.is_empty() {
            let mut deletion_builder = update::DeleteDocuments::new(self.wtxn, self.index)?;
            debug!("documents to delete {:?}", replaced_documents_ids);
            deletion_builder.delete_documents(&replaced_documents_ids);
            let deleted_documents_count = deletion_builder.execute()?;
            debug!("{} documents actually deleted", deleted_documents_count.deleted_documents);
        }

        let index_documents_ids = self.index.documents_ids(self.wtxn)?;
        let index_is_empty = index_documents_ids.len() == 0;
        let mut final_documents_ids = RoaringBitmap::new();
        let mut word_pair_proximity_docids = None;
        let mut word_position_docids = None;
        let mut word_docids = None;
        let mut exact_word_docids = None;

        let mut databases_seen = 0;
        (self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
            databases_seen,
            total_databases: TOTAL_POSTING_DATABASE_COUNT,
        });

        for result in lmdb_writer_rx {
            let typed_chunk = match result? {
                TypedChunk::WordDocids { word_docids_reader, exact_word_docids_reader } => {
                    let cloneable_chunk = unsafe { as_cloneable_grenad(&word_docids_reader)? };
                    word_docids = Some(cloneable_chunk);
                    let cloneable_chunk =
                        unsafe { as_cloneable_grenad(&exact_word_docids_reader)? };
                    exact_word_docids = Some(cloneable_chunk);
                    TypedChunk::WordDocids { word_docids_reader, exact_word_docids_reader }
                }
                TypedChunk::WordPairProximityDocids(chunk) => {
                    let cloneable_chunk = unsafe { as_cloneable_grenad(&chunk)? };
                    word_pair_proximity_docids = Some(cloneable_chunk);
                    TypedChunk::WordPairProximityDocids(chunk)
                }
                TypedChunk::WordPositionDocids(chunk) => {
                    let cloneable_chunk = unsafe { as_cloneable_grenad(&chunk)? };
                    word_position_docids = Some(cloneable_chunk);
                    TypedChunk::WordPositionDocids(chunk)
                }
                otherwise => otherwise,
            };

            let (docids, is_merged_database) =
                write_typed_chunk_into_index(typed_chunk, &self.index, self.wtxn, index_is_empty)?;
            if !docids.is_empty() {
                final_documents_ids |= docids;
                let documents_seen_count = final_documents_ids.len();
                (self.progress)(UpdateIndexingStep::IndexDocuments {
                    documents_seen: documents_seen_count as usize,
                    total_documents: documents_count,
                });
                debug!(
                    "We have seen {} documents on {} total document so far",
                    documents_seen_count, documents_count
                );
            }
            if is_merged_database {
                databases_seen += 1;
                (self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
                    databases_seen,
                    total_databases: TOTAL_POSTING_DATABASE_COUNT,
                });
            }
        }

        // We write the field distribution into the main database
        self.index.put_field_distribution(self.wtxn, &field_distribution)?;

        // We write the primary key field id into the main database
        self.index.put_primary_key(self.wtxn, &primary_key)?;

        // We write the external documents ids into the main database.
        self.index.put_external_documents_ids(self.wtxn, &external_documents_ids)?;

        let all_documents_ids = index_documents_ids | new_documents_ids | replaced_documents_ids;
        self.index.put_documents_ids(self.wtxn, &all_documents_ids)?;

        self.execute_prefix_databases(
            word_docids,
            exact_word_docids,
            word_pair_proximity_docids,
            word_position_docids,
        )?;

        Ok(all_documents_ids.len())
    }

    #[logging_timer::time("IndexDocuments::{}")]
    pub fn execute_prefix_databases(
        self,
        word_docids: Option<grenad::Reader<CursorClonableMmap>>,
        exact_word_docids: Option<grenad::Reader<CursorClonableMmap>>,
        word_pair_proximity_docids: Option<grenad::Reader<CursorClonableMmap>>,
        word_position_docids: Option<grenad::Reader<CursorClonableMmap>>,
    ) -> Result<()>
    where
        F: Fn(UpdateIndexingStep) + Sync,
    {
        // Merged databases are already been indexed, we start from this count;
        let mut databases_seen = MERGED_DATABASE_COUNT;

        // Run the facets update operation.
        let mut builder = Facets::new(self.wtxn, self.index);
        builder.chunk_compression_type = self.indexer_config.chunk_compression_type;
        builder.chunk_compression_level = self.indexer_config.chunk_compression_level;
        if let Some(value) = self.config.facet_level_group_size {
            builder.level_group_size(value);
        }
        if let Some(value) = self.config.facet_min_level_size {
            builder.min_level_size(value);
        }
        builder.execute()?;

        databases_seen += 1;
        (self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
            databases_seen,
            total_databases: TOTAL_POSTING_DATABASE_COUNT,
        });

        let previous_words_prefixes_fst =
            self.index.words_prefixes_fst(self.wtxn)?.map_data(|cow| cow.into_owned())?;

        // Run the words prefixes update operation.
        let mut builder = WordsPrefixesFst::new(self.wtxn, self.index);
        if let Some(value) = self.config.words_prefix_threshold {
            builder.threshold(value);
        }
        if let Some(value) = self.config.max_prefix_length {
            builder.max_prefix_length(value);
        }
        builder.execute()?;

        let current_prefix_fst = self.index.words_prefixes_fst(self.wtxn)?;

        // We retrieve the common words between the previous and new prefix word fst.
        let common_prefix_fst_words = fst_stream_into_vec(
            previous_words_prefixes_fst.op().add(&current_prefix_fst).intersection(),
        );
        let common_prefix_fst_words: Vec<_> = common_prefix_fst_words
            .as_slice()
            .linear_group_by_key(|x| x.chars().nth(0).unwrap())
            .collect();

        // We retrieve the newly added words between the previous and new prefix word fst.
        let new_prefix_fst_words = fst_stream_into_vec(
            current_prefix_fst.op().add(&previous_words_prefixes_fst).difference(),
        );

        // We compute the set of prefixes that are no more part of the prefix fst.
        let del_prefix_fst_words = fst_stream_into_hashset(
            previous_words_prefixes_fst.op().add(&current_prefix_fst).difference(),
        );

        databases_seen += 1;
        (self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
            databases_seen,
            total_databases: TOTAL_POSTING_DATABASE_COUNT,
        });

        if let Some(word_docids) = word_docids {
            execute_word_prefix_docids(
                self.wtxn,
                word_docids,
                self.index.word_docids.clone(),
                self.index.word_prefix_docids.clone(),
                &self.indexer_config,
                &new_prefix_fst_words,
                &common_prefix_fst_words,
                &del_prefix_fst_words,
            )?;
        }

        if let Some(exact_word_docids) = exact_word_docids {
            execute_word_prefix_docids(
                self.wtxn,
                exact_word_docids,
                self.index.exact_word_docids.clone(),
                self.index.exact_word_prefix_docids.clone(),
                &self.indexer_config,
                &new_prefix_fst_words,
                &common_prefix_fst_words,
                &del_prefix_fst_words,
            )?;
        }

        databases_seen += 1;
        (self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
            databases_seen,
            total_databases: TOTAL_POSTING_DATABASE_COUNT,
        });

        if let Some(word_pair_proximity_docids) = word_pair_proximity_docids {
            // Run the word prefix pair proximity docids update operation.
            let mut builder = WordPrefixPairProximityDocids::new(self.wtxn, self.index);
            builder.chunk_compression_type = self.indexer_config.chunk_compression_type;
            builder.chunk_compression_level = self.indexer_config.chunk_compression_level;
            builder.max_nb_chunks = self.indexer_config.max_nb_chunks;
            builder.max_memory = self.indexer_config.max_memory;
            builder.execute(
                word_pair_proximity_docids,
                &new_prefix_fst_words,
                &common_prefix_fst_words,
                &del_prefix_fst_words,
            )?;
        }

        databases_seen += 1;
        (self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
            databases_seen,
            total_databases: TOTAL_POSTING_DATABASE_COUNT,
        });

        if let Some(word_position_docids) = word_position_docids {
            // Run the words prefix position docids update operation.
            let mut builder = WordPrefixPositionDocids::new(self.wtxn, self.index);
            builder.chunk_compression_type = self.indexer_config.chunk_compression_type;
            builder.chunk_compression_level = self.indexer_config.chunk_compression_level;
            builder.max_nb_chunks = self.indexer_config.max_nb_chunks;
            builder.max_memory = self.indexer_config.max_memory;
            if let Some(value) = self.config.words_positions_level_group_size {
                builder.level_group_size(value);
            }
            if let Some(value) = self.config.words_positions_min_level_size {
                builder.min_level_size(value);
            }
            builder.execute(
                word_position_docids,
                &new_prefix_fst_words,
                &common_prefix_fst_words,
                &del_prefix_fst_words,
            )?;
        }

        databases_seen += 1;
        (self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
            databases_seen,
            total_databases: TOTAL_POSTING_DATABASE_COUNT,
        });

        Ok(())
    }
}

/// Run the word prefix docids update operation.
fn execute_word_prefix_docids(
    txn: &mut heed::RwTxn,
    reader: grenad::Reader<Cursor<ClonableMmap>>,
    word_docids_db: Database<Str, RoaringBitmapCodec>,
    word_prefix_docids_db: Database<Str, RoaringBitmapCodec>,
    indexer_config: &IndexerConfig,
    new_prefix_fst_words: &[String],
    common_prefix_fst_words: &[&[String]],
    del_prefix_fst_words: &HashSet<Vec<u8>>,
) -> Result<()> {
    let cursor = reader.into_cursor()?;
    let mut builder = WordPrefixDocids::new(txn, word_docids_db, word_prefix_docids_db);
    builder.chunk_compression_type = indexer_config.chunk_compression_type;
    builder.chunk_compression_level = indexer_config.chunk_compression_level;
    builder.max_nb_chunks = indexer_config.max_nb_chunks;
    builder.max_memory = indexer_config.max_memory;
    builder.execute(
        cursor,
        &new_prefix_fst_words,
        &common_prefix_fst_words,
        &del_prefix_fst_words,
    )?;
    Ok(())
}

#[cfg(test)]
mod tests {
    use std::io::Cursor;

    use big_s::S;
    use heed::EnvOpenOptions;
    use maplit::hashset;

    use super::*;
    use crate::documents::DocumentBatchBuilder;
    use crate::update::DeleteDocuments;
    use crate::HashMap;

    #[test]
    fn simple_document_replacement() {
        let path = tempfile::tempdir().unwrap();
        let mut options = EnvOpenOptions::new();
        options.map_size(10 * 1024 * 1024); // 10 MB
        let index = Index::new(options, &path).unwrap();

        // First we send 3 documents with ids from 1 to 3.
        let mut wtxn = index.write_txn().unwrap();
        let content = documents!([
            { "id": 1, "name": "kevin" },
            { "id": 2, "name": "kevina" },
            { "id": 3, "name": "benoit" }
        ]);

        let config = IndexerConfig::default();
        let indexing_config = IndexDocumentsConfig::default();
        let mut builder =
            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
                .unwrap();
        builder.add_documents(content).unwrap();
        builder.execute().unwrap();
        wtxn.commit().unwrap();

        // Check that there is 3 documents now.
        let rtxn = index.read_txn().unwrap();
        let count = index.number_of_documents(&rtxn).unwrap();
        assert_eq!(count, 3);
        drop(rtxn);

        // Second we send 1 document with id 1, to erase the previous ones.
        let mut wtxn = index.write_txn().unwrap();
        let content = documents!([ { "id": 1, "name": "updated kevin" } ]);
        let mut builder =
            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
                .unwrap();
        builder.add_documents(content).unwrap();
        builder.execute().unwrap();
        wtxn.commit().unwrap();

        // Check that there is **always** 3 documents.
        let rtxn = index.read_txn().unwrap();
        let count = index.number_of_documents(&rtxn).unwrap();
        assert_eq!(count, 3);
        drop(rtxn);

        // Third we send 3 documents again to replace the existing ones.
        let mut wtxn = index.write_txn().unwrap();
        let content = documents!([
            { "id": 1, "name": "updated second kevin" },
            { "id": 2, "name": "updated kevina" },
            { "id": 3, "name": "updated benoit" }
        ]);
        let mut builder =
            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
        builder.add_documents(content).unwrap();
        wtxn.commit().unwrap();

        // Check that there is **always** 3 documents.
        let rtxn = index.read_txn().unwrap();
        let count = index.number_of_documents(&rtxn).unwrap();
        assert_eq!(count, 3);
        drop(rtxn);
    }

    #[test]
    fn simple_document_merge() {
        let path = tempfile::tempdir().unwrap();
        let mut options = EnvOpenOptions::new();
        options.map_size(10 * 1024 * 1024); // 10 MB
        let index = Index::new(options, &path).unwrap();

        // First we send 3 documents with duplicate ids and
        // change the index method to merge documents.
        let mut wtxn = index.write_txn().unwrap();
        let content = documents!([
            { "id": 1, "name": "kevin" },
            { "id": 1, "name": "kevina" },
            { "id": 1, "name": "benoit" }
        ]);
        let config = IndexerConfig::default();
        let indexing_config = IndexDocumentsConfig {
            update_method: IndexDocumentsMethod::UpdateDocuments,
            ..Default::default()
        };
        let mut builder =
            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
                .unwrap();
        builder.add_documents(content).unwrap();
        builder.execute().unwrap();
        wtxn.commit().unwrap();

        // Check that there is only 1 document now.
        let rtxn = index.read_txn().unwrap();
        let count = index.number_of_documents(&rtxn).unwrap();
        assert_eq!(count, 1);

        // Check that we get only one document from the database.
        let docs = index.documents(&rtxn, Some(0)).unwrap();
        assert_eq!(docs.len(), 1);
        let (id, doc) = docs[0];
        assert_eq!(id, 0);

        // Check that this document is equal to the last one sent.
        let mut doc_iter = doc.iter();
        assert_eq!(doc_iter.next(), Some((0, &b"1"[..])));
        assert_eq!(doc_iter.next(), Some((1, &br#""benoit""#[..])));
        assert_eq!(doc_iter.next(), None);
        drop(rtxn);

        // Second we send 1 document with id 1, to force it to be merged with the previous one.
        let mut wtxn = index.write_txn().unwrap();
        let content = documents!([ { "id": 1, "age": 25 } ]);
        let mut builder =
            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
        builder.add_documents(content).unwrap();
        builder.execute().unwrap();
        wtxn.commit().unwrap();

        // Check that there is **always** 1 document.
        let rtxn = index.read_txn().unwrap();
        let count = index.number_of_documents(&rtxn).unwrap();
        assert_eq!(count, 1);

        // Check that we get only one document from the database.
        // Since the document has been deleted and re-inserted, its internal docid has been incremented to 1
        let docs = index.documents(&rtxn, Some(1)).unwrap();
        assert_eq!(docs.len(), 1);
        let (id, doc) = docs[0];
        assert_eq!(id, 1);

        // Check that this document is equal to the last one sent.
        let mut doc_iter = doc.iter();
        assert_eq!(doc_iter.next(), Some((0, &b"1"[..])));
        assert_eq!(doc_iter.next(), Some((1, &br#""benoit""#[..])));
        assert_eq!(doc_iter.next(), Some((2, &b"25"[..])));
        assert_eq!(doc_iter.next(), None);
        drop(rtxn);
    }

    #[test]
    fn not_auto_generated_documents_ids() {
        let path = tempfile::tempdir().unwrap();
        let mut options = EnvOpenOptions::new();
        options.map_size(10 * 1024 * 1024); // 10 MB
        let index = Index::new(options, &path).unwrap();

        // First we send 3 documents with ids from 1 to 3.
        let mut wtxn = index.write_txn().unwrap();
        let content = documents!([
            { "name": "kevin" },
            { "name": "kevina" },
            { "name": "benoit" }
        ]);
        let config = IndexerConfig::default();
        let indexing_config = IndexDocumentsConfig::default();
        let mut builder =
            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
        assert!(builder.add_documents(content).is_err());
        wtxn.commit().unwrap();

        // Check that there is no document.
        let rtxn = index.read_txn().unwrap();
        let count = index.number_of_documents(&rtxn).unwrap();
        assert_eq!(count, 0);
        drop(rtxn);
    }

    #[test]
    fn simple_auto_generated_documents_ids() {
        let path = tempfile::tempdir().unwrap();
        let mut options = EnvOpenOptions::new();
        options.map_size(10 * 1024 * 1024); // 10 MB
        let index = Index::new(options, &path).unwrap();

        // First we send 3 documents with ids from 1 to 3.
        let mut wtxn = index.write_txn().unwrap();
        let content = documents!([
            { "name": "kevin" },
            { "name": "kevina" },
            { "name": "benoit" }
        ]);
        let config = IndexerConfig::default();
        let indexing_config =
            IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
        let mut builder =
            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
                .unwrap();
        builder.add_documents(content).unwrap();
        builder.execute().unwrap();
        wtxn.commit().unwrap();

        // Check that there is 3 documents now.
        let rtxn = index.read_txn().unwrap();
        let count = index.number_of_documents(&rtxn).unwrap();
        assert_eq!(count, 3);

        let docs = index.documents(&rtxn, vec![0, 1, 2]).unwrap();
        let (_id, obkv) = docs.iter().find(|(_id, kv)| kv.get(0) == Some(br#""kevin""#)).unwrap();
        let kevin_uuid: String = serde_json::from_slice(&obkv.get(1).unwrap()).unwrap();
        drop(rtxn);

        // Second we send 1 document with the generated uuid, to erase the previous ones.
        let mut wtxn = index.write_txn().unwrap();
        let content = documents!([ { "name": "updated kevin", "id": kevin_uuid } ]);
        let mut builder =
            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
        builder.add_documents(content).unwrap();
        builder.execute().unwrap();
        wtxn.commit().unwrap();

        // Check that there is **always** 3 documents.
        let rtxn = index.read_txn().unwrap();
        let count = index.number_of_documents(&rtxn).unwrap();
        assert_eq!(count, 3);

        // the document 0 has been deleted and reinserted with the id 3
        let docs = index.documents(&rtxn, vec![1, 2, 3]).unwrap();
        let kevin_position =
            docs.iter().position(|(_, d)| d.get(0).unwrap() == br#""updated kevin""#).unwrap();
        assert_eq!(kevin_position, 2);
        let (_, doc) = docs[kevin_position];

        // Check that this document is equal to the last
        // one sent and that an UUID has been generated.
        assert_eq!(doc.get(0), Some(&br#""updated kevin""#[..]));
        // This is an UUID, it must be 36 bytes long plus the 2 surrounding string quotes (").
        assert_eq!(doc.get(1).unwrap().len(), 36 + 2);
        drop(rtxn);
    }

    #[test]
    fn reordered_auto_generated_documents_ids() {
        let path = tempfile::tempdir().unwrap();
        let mut options = EnvOpenOptions::new();
        options.map_size(10 * 1024 * 1024); // 10 MB
        let index = Index::new(options, &path).unwrap();

        // First we send 3 documents with ids from 1 to 3.
        let mut wtxn = index.write_txn().unwrap();
        let content = documents!([
            { "id": 1, "name": "kevin" },
            { "id": 2, "name": "kevina" },
            { "id": 3, "name": "benoit" }
        ]);
        let config = IndexerConfig::default();
        let indexing_config = IndexDocumentsConfig::default();
        let mut builder =
            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
        builder.add_documents(content).unwrap();
        builder.execute().unwrap();
        wtxn.commit().unwrap();

        // Check that there is 3 documents now.
        let rtxn = index.read_txn().unwrap();
        let count = index.number_of_documents(&rtxn).unwrap();
        assert_eq!(count, 3);
        drop(rtxn);

        // Second we send 1 document without specifying the id.
        let mut wtxn = index.write_txn().unwrap();
        let content = documents!([ { "name": "new kevin" } ]);
        let indexing_config =
            IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
        let mut builder =
            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
        builder.add_documents(content).unwrap();
        builder.execute().unwrap();
        wtxn.commit().unwrap();

        // Check that there is 4 documents now.
        let rtxn = index.read_txn().unwrap();
        let count = index.number_of_documents(&rtxn).unwrap();
        assert_eq!(count, 4);
        drop(rtxn);
    }

    #[test]
    fn empty_update() {
        let path = tempfile::tempdir().unwrap();
        let mut options = EnvOpenOptions::new();
        options.map_size(10 * 1024 * 1024); // 10 MB
        let index = Index::new(options, &path).unwrap();

        // First we send 0 documents and only headers.
        let mut wtxn = index.write_txn().unwrap();
        let content = documents!([]);
        let config = IndexerConfig::default();
        let indexing_config = IndexDocumentsConfig::default();
        let mut builder =
            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
        builder.add_documents(content).unwrap();
        builder.execute().unwrap();
        wtxn.commit().unwrap();

        // Check that there is no documents.
        let rtxn = index.read_txn().unwrap();
        let count = index.number_of_documents(&rtxn).unwrap();
        assert_eq!(count, 0);
        drop(rtxn);
    }

    #[test]
    fn invalid_documents_ids() {
        let path = tempfile::tempdir().unwrap();
        let mut options = EnvOpenOptions::new();
        options.map_size(10 * 1024 * 1024); // 10 MB
        let index = Index::new(options, &path).unwrap();

        // First we send 1 document with an invalid id.
        let mut wtxn = index.write_txn().unwrap();
        // There is a space in the document id.
        let content = documents!([ { "id": "brume bleue", "name": "kevin" } ]);
        let config = IndexerConfig::default();
        let indexing_config = IndexDocumentsConfig::default();
        let mut builder =
            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
                .unwrap();
        assert!(builder.add_documents(content).is_err());
        wtxn.commit().unwrap();

        // First we send 1 document with a valid id.
        let mut wtxn = index.write_txn().unwrap();
        // There is a space in the document id.
        let content = documents!([ { "id": 32, "name": "kevin" } ]);
        let mut builder =
            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
        builder.add_documents(content).unwrap();
        builder.execute().unwrap();
        wtxn.commit().unwrap();

        // Check that there is 1 document now.
        let rtxn = index.read_txn().unwrap();
        let count = index.number_of_documents(&rtxn).unwrap();
        assert_eq!(count, 1);
        drop(rtxn);
    }

    #[test]
    fn complex_documents() {
        let path = tempfile::tempdir().unwrap();
        let mut options = EnvOpenOptions::new();
        options.map_size(10 * 1024 * 1024); // 10 MB
        let index = Index::new(options, &path).unwrap();

        // First we send 3 documents with an id for only one of them.
        let mut wtxn = index.write_txn().unwrap();
        let content = documents!([
            { "id": 0, "name": "kevin", "object": { "key1": "value1", "key2": "value2" } },
            { "id": 1, "name": "kevina", "array": ["I", "am", "fine"] },
            { "id": 2, "name": "benoit", "array_of_object": [{ "wow": "amazing" }] }
        ]);
        let config = IndexerConfig::default();
        let indexing_config = IndexDocumentsConfig::default();
        let mut builder =
            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
        builder.add_documents(content).unwrap();
        builder.execute().unwrap();
        wtxn.commit().unwrap();

        // Check that there is 1 documents now.
        let rtxn = index.read_txn().unwrap();

        // Search for a sub object value
        let result = index.search(&rtxn).query(r#""value2""#).execute().unwrap();
        assert_eq!(result.documents_ids, vec![0]);

        // Search for a sub array value
        let result = index.search(&rtxn).query(r#""fine""#).execute().unwrap();
        assert_eq!(result.documents_ids, vec![1]);

        // Search for a sub array sub object key
        let result = index.search(&rtxn).query(r#""amazing""#).execute().unwrap();
        assert_eq!(result.documents_ids, vec![2]);

        drop(rtxn);
    }

    #[test]
    fn simple_documents_replace() {
        let path = tempfile::tempdir().unwrap();
        let mut options = EnvOpenOptions::new();
        options.map_size(10 * 1024 * 1024); // 10 MB
        let index = Index::new(options, &path).unwrap();

        // First we send 3 documents with an id for only one of them.
        let mut wtxn = index.write_txn().unwrap();
        let documents = documents!([
          { "id": 2,    "title": "Pride and Prejudice",                    "author": "Jane Austin",              "genre": "romance",    "price": 3.5, "_geo": { "lat": 12, "lng": 42 } },
          { "id": 456,  "title": "Le Petit Prince",                        "author": "Antoine de Saint-Exupéry", "genre": "adventure" , "price": 10.0 },
          { "id": 1,    "title": "Alice In Wonderland",                    "author": "Lewis Carroll",            "genre": "fantasy",    "price": 25.99 },
          { "id": 1344, "title": "The Hobbit",                             "author": "J. R. R. Tolkien",         "genre": "fantasy" },
          { "id": 4,    "title": "Harry Potter and the Half-Blood Prince", "author": "J. K. Rowling",            "genre": "fantasy" },
          { "id": 42,   "title": "The Hitchhiker's Guide to the Galaxy",   "author": "Douglas Adams", "_geo": { "lat": 35, "lng": 23 } }
        ]);
        let config = IndexerConfig::default();
        let indexing_config = IndexDocumentsConfig {
            update_method: IndexDocumentsMethod::ReplaceDocuments,
            ..Default::default()
        };
        let mut builder =
            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
        builder.add_documents(documents).unwrap();
        builder.execute().unwrap();
        wtxn.commit().unwrap();

        let mut wtxn = index.write_txn().unwrap();
        let indexing_config = IndexDocumentsConfig {
            update_method: IndexDocumentsMethod::UpdateDocuments,
            ..Default::default()
        };
        let mut builder =
            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
        let documents = documents!([
          {
            "id": 2,
            "author": "J. Austen",
            "date": "1813"
          }
        ]);

        builder.add_documents(documents).unwrap();
        builder.execute().unwrap();
        wtxn.commit().unwrap();
    }

    #[test]
    fn mixed_geo_documents() {
        let path = tempfile::tempdir().unwrap();
        let mut options = EnvOpenOptions::new();
        options.map_size(10 * 1024 * 1024); // 10 MB
        let index = Index::new(options, &path).unwrap();

        // We send 6 documents and mix the ones that have _geo and those that don't have it.
        let mut wtxn = index.write_txn().unwrap();
        let documents = documents!([
          { "id": 2, "price": 3.5, "_geo": { "lat": 12, "lng": 42 } },
          { "id": 456 },
          { "id": 1 },
          { "id": 1344 },
          { "id": 4 },
          { "id": 42, "_geo": { "lat": 35, "lng": 23 } }
        ]);
        let config = IndexerConfig::default();
        let indexing_config = IndexDocumentsConfig {
            update_method: IndexDocumentsMethod::ReplaceDocuments,
            ..Default::default()
        };
        let mut builder =
            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
        builder.add_documents(documents).unwrap();
        builder.execute().unwrap();
        wtxn.commit().unwrap();

        let mut wtxn = index.write_txn().unwrap();
        let mut builder = update::Settings::new(&mut wtxn, &index, &config);

        let faceted_fields = hashset!(S("_geo"));
        builder.set_filterable_fields(faceted_fields);
        builder.execute(|_| ()).unwrap();
        wtxn.commit().unwrap();
    }

    #[test]
    fn index_all_flavour_of_geo() {
        let path = tempfile::tempdir().unwrap();
        let mut options = EnvOpenOptions::new();
        options.map_size(10 * 1024 * 1024); // 10 MB
        let index = Index::new(options, &path).unwrap();

        let config = IndexerConfig::default();
        let mut wtxn = index.write_txn().unwrap();
        let mut builder = update::Settings::new(&mut wtxn, &index, &config);

        builder.set_filterable_fields(hashset!(S("_geo")));
        builder.execute(|_| ()).unwrap();
        wtxn.commit().unwrap();

        let indexing_config = IndexDocumentsConfig {
            update_method: IndexDocumentsMethod::ReplaceDocuments,
            ..Default::default()
        };
        let mut wtxn = index.write_txn().unwrap();

        let documents = documents!([
          { "id": 0, "_geo": { "lat": 31, "lng": [42] } },
          { "id": 1, "_geo": { "lat": "31" }, "_geo.lng": 42 },
          { "id": 2, "_geo": { "lng": "42" }, "_geo.lat": "31" },
          { "id": 3, "_geo.lat": 31, "_geo.lng": "42" },
        ]);
        let mut builder =
            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
                .unwrap();
        builder.add_documents(documents).unwrap();
        builder.execute().unwrap();
        wtxn.commit().unwrap();

        let rtxn = index.read_txn().unwrap();

        let mut search = crate::Search::new(&rtxn, &index);
        search.filter(crate::Filter::from_str("_geoRadius(31, 42, 0.000001)").unwrap().unwrap());
        let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
        assert_eq!(documents_ids, vec![0, 1, 2, 3]);
    }

    #[test]
    fn geo_error() {
        let path = tempfile::tempdir().unwrap();
        let mut options = EnvOpenOptions::new();
        options.map_size(10 * 1024 * 1024); // 10 MB
        let index = Index::new(options, &path).unwrap();

        let config = IndexerConfig::default();
        let mut wtxn = index.write_txn().unwrap();
        let mut builder = update::Settings::new(&mut wtxn, &index, &config);

        builder.set_filterable_fields(hashset!(S("_geo")));
        builder.execute(|_| ()).unwrap();
        wtxn.commit().unwrap();

        let indexing_config = IndexDocumentsConfig {
            update_method: IndexDocumentsMethod::ReplaceDocuments,
            ..Default::default()
        };
        let mut wtxn = index.write_txn().unwrap();

        let documents = documents!([
          { "id": 0, "_geo": { "lng": 42 } }
        ]);
        let mut builder =
            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
                .unwrap();
        builder.add_documents(documents).unwrap();
        let error = builder.execute().unwrap_err();
        assert_eq!(
            &error.to_string(),
            r#"Could not find latitude in the document with the id: `0`. Was expecting a `_geo.lat` field."#
        );

        let documents = documents!([
          { "id": 0, "_geo": { "lat": 42 } }
        ]);
        let mut builder =
            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
                .unwrap();
        builder.add_documents(documents).unwrap();
        let error = builder.execute().unwrap_err();
        assert_eq!(
            &error.to_string(),
            r#"Could not find longitude in the document with the id: `0`. Was expecting a `_geo.lng` field."#
        );

        let documents = documents!([
          { "id": 0, "_geo": { "lat": "lol", "lng": 42 } }
        ]);
        let mut builder =
            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
                .unwrap();
        builder.add_documents(documents).unwrap();
        let error = builder.execute().unwrap_err();
        assert_eq!(
            &error.to_string(),
            r#"Could not parse latitude in the document with the id: `0`. Was expecting a number but instead got `"lol"`."#
        );

        let documents = documents!([
          { "id": 0, "_geo": { "lat": [12, 13], "lng": 42 } }
        ]);
        let mut builder =
            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
                .unwrap();
        builder.add_documents(documents).unwrap();
        let error = builder.execute().unwrap_err();
        assert_eq!(
            &error.to_string(),
            r#"Could not parse latitude in the document with the id: `0`. Was expecting a number but instead got `[12,13]`."#
        );

        let documents = documents!([
          { "id": 0, "_geo": { "lat": 12, "lng": "hello" } }
        ]);
        let mut builder =
            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
                .unwrap();
        builder.add_documents(documents).unwrap();
        let error = builder.execute().unwrap_err();
        assert_eq!(
            &error.to_string(),
            r#"Could not parse longitude in the document with the id: `0`. Was expecting a number but instead got `"hello"`."#
        );
    }

    #[test]
    fn delete_documents_then_insert() {
        let path = tempfile::tempdir().unwrap();
        let mut options = EnvOpenOptions::new();
        options.map_size(10 * 1024 * 1024); // 10 MB
        let index = Index::new(options, &path).unwrap();

        let mut wtxn = index.write_txn().unwrap();
        let content = documents!([
            { "objectId": 123, "title": "Pride and Prejudice", "comment": "A great book" },
            { "objectId": 456, "title": "Le Petit Prince",     "comment": "A french book" },
            { "objectId": 1,   "title": "Alice In Wonderland", "comment": "A weird book" },
            { "objectId": 30,  "title": "Hamlet", "_geo": { "lat": 12, "lng": 89 } }
        ]);
        let config = IndexerConfig::default();
        let indexing_config = IndexDocumentsConfig::default();
        let mut builder =
            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
                .unwrap();
        builder.add_documents(content).unwrap();
        builder.execute().unwrap();

        assert_eq!(index.primary_key(&wtxn).unwrap(), Some("objectId"));

        // Delete not all of the documents but some of them.
        let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
        builder.delete_external_id("30");
        builder.execute().unwrap();

        let external_documents_ids = index.external_documents_ids(&wtxn).unwrap();
        assert!(external_documents_ids.get("30").is_none());

        let content = documents!([
            { "objectId": 30,  "title": "Hamlet", "_geo": { "lat": 12, "lng": 89 } }
        ]);

        let mut builder =
            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
                .unwrap();
        builder.add_documents(content).unwrap();
        builder.execute().unwrap();
        let external_documents_ids = index.external_documents_ids(&wtxn).unwrap();
        assert!(external_documents_ids.get("30").is_some());

        let content = documents!([
            { "objectId": 30,  "title": "Hamlet", "_geo": { "lat": 12, "lng": 89 } }
        ]);

        let mut builder =
            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
                .unwrap();
        builder.add_documents(content).unwrap();
        builder.execute().unwrap();

        wtxn.commit().unwrap();
    }

    #[test]
    fn index_more_than_256_fields() {
        let path = tempfile::tempdir().unwrap();
        let mut options = EnvOpenOptions::new();
        options.map_size(10 * 1024 * 1024); // 10 MB
        let index = Index::new(options, &path).unwrap();

        let mut wtxn = index.write_txn().unwrap();

        let mut big_object = HashMap::new();
        big_object.insert(S("id"), "wow");
        for i in 0..1000 {
            let key = i.to_string();
            big_object.insert(key, "I am a text!");
        }

        let mut cursor = Cursor::new(Vec::new());

        let mut builder = DocumentBatchBuilder::new(&mut cursor).unwrap();
        let big_object = Cursor::new(serde_json::to_vec(&big_object).unwrap());
        builder.extend_from_json(big_object).unwrap();
        builder.finish().unwrap();
        cursor.set_position(0);
        let content = DocumentBatchReader::from_reader(cursor).unwrap();

        let config = IndexerConfig::default();
        let indexing_config = IndexDocumentsConfig::default();
        let mut builder =
            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
                .unwrap();
        builder.add_documents(content).unwrap();
        builder.execute().unwrap();

        wtxn.commit().unwrap();
    }

    #[test]
    fn index_more_than_1000_positions_in_a_field() {
        let path = tempfile::tempdir().unwrap();
        let mut options = EnvOpenOptions::new();
        options.map_size(50 * 1024 * 1024); // 10 MB
        let index = Index::new(options, &path).unwrap();

        let mut wtxn = index.write_txn().unwrap();

        let mut big_object = HashMap::new();
        big_object.insert(S("id"), "wow");
        let content: String = (0..=u16::MAX)
            .into_iter()
            .map(|p| p.to_string())
            .reduce(|a, b| a + " " + b.as_ref())
            .unwrap();
        big_object.insert("content".to_string(), &content);

        let mut cursor = Cursor::new(Vec::new());

        let big_object = serde_json::to_string(&big_object).unwrap();
        let mut builder = DocumentBatchBuilder::new(&mut cursor).unwrap();
        builder.extend_from_json(&mut big_object.as_bytes()).unwrap();
        builder.finish().unwrap();
        cursor.set_position(0);
        let content = DocumentBatchReader::from_reader(cursor).unwrap();

        let config = IndexerConfig::default();
        let indexing_config = IndexDocumentsConfig::default();
        let mut builder =
            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
                .unwrap();
        builder.add_documents(content).unwrap();
        builder.execute().unwrap();

        wtxn.commit().unwrap();

        let mut rtxn = index.read_txn().unwrap();

        assert!(index.word_docids.get(&mut rtxn, "0").unwrap().is_some());
        assert!(index.word_docids.get(&mut rtxn, "64").unwrap().is_some());
        assert!(index.word_docids.get(&mut rtxn, "256").unwrap().is_some());
        assert!(index.word_docids.get(&mut rtxn, "1024").unwrap().is_some());
        assert!(index.word_docids.get(&mut rtxn, "32768").unwrap().is_some());
        assert!(index.word_docids.get(&mut rtxn, "65535").unwrap().is_some());
    }

    #[test]
    fn index_documents_with_zeroes() {
        let path = tempfile::tempdir().unwrap();
        let mut options = EnvOpenOptions::new();
        options.map_size(10 * 1024 * 1024); // 10 MB
        let index = Index::new(options, &path).unwrap();

        let mut wtxn = index.write_txn().unwrap();
        let content = documents!([
            {
                "id": 2,
                "title": "Prideand Prejudice",
                "au{hor": "Jane Austin",
                "genre": "romance",
                "price$": "3.5$",
            },
            {
                "id": 456,
                "title": "Le Petit Prince",
                "au{hor": "Antoine de Saint-Exupéry",
                "genre": "adventure",
                "price$": "10.0$",
            },
            {
                "id": 1,
                "title": "Wonderland",
                "au{hor": "Lewis Carroll",
                "genre": "fantasy",
                "price$": "25.99$",
            },
            {
                "id": 4,
                "title": "Harry Potter ing fantasy\0lood Prince",
                "au{hor": "J. K. Rowling",
                "genre": "fantasy\0",
            },
        ]);

        let config = IndexerConfig::default();
        let indexing_config = IndexDocumentsConfig::default();
        let mut builder =
            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
                .unwrap();
        builder.add_documents(content).unwrap();
        builder.execute().unwrap();

        wtxn.commit().unwrap();
    }

    #[test]
    fn index_documents_with_nested_fields() {
        let path = tempfile::tempdir().unwrap();
        let mut options = EnvOpenOptions::new();
        options.map_size(10 * 1024 * 1024); // 10 MB
        let index = Index::new(options, &path).unwrap();

        let mut wtxn = index.write_txn().unwrap();
        let content = documents!([
            {
                "id": 0,
                "title": "The zeroth document",
            },
            {
                "id": 1,
                "title": "The first document",
                "nested": {
                    "object": "field",
                    "machin": "bidule",
                },
            },
            {
                "id": 2,
                "title": "The second document",
                "nested": [
                    "array",
                    {
                        "object": "field",
                    },
                    {
                        "prout": "truc",
                        "machin": "lol",
                    },
                ],
            },
            {
                "id": 3,
                "title": "The third document",
                "nested": "I lied",
            },
        ]);

        let config = IndexerConfig::default();
        let indexing_config = IndexDocumentsConfig::default();
        let mut builder =
            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
                .unwrap();
        builder.add_documents(content).unwrap();
        builder.execute().unwrap();

        wtxn.commit().unwrap();

        let mut wtxn = index.write_txn().unwrap();
        let mut builder = update::Settings::new(&mut wtxn, &index, &config);

        let searchable_fields = vec![S("title"), S("nested.object"), S("nested.machin")];
        builder.set_searchable_fields(searchable_fields);

        let faceted_fields = hashset!(S("title"), S("nested.object"), S("nested.machin"));
        builder.set_filterable_fields(faceted_fields);
        builder.execute(|_| ()).unwrap();
        wtxn.commit().unwrap();

        let rtxn = index.read_txn().unwrap();

        let facets = index.faceted_fields(&rtxn).unwrap();
        assert_eq!(facets, hashset!(S("title"), S("nested.object"), S("nested.machin")));

        // testing the simple query search
        let mut search = crate::Search::new(&rtxn, &index);
        search.query("document");
        search.authorize_typos(true);
        search.optional_words(true);
        // all documents should be returned
        let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
        assert_eq!(documents_ids.len(), 4);

        search.query("zeroth");
        let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
        assert_eq!(documents_ids, vec![0]);
        search.query("first");
        let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
        assert_eq!(documents_ids, vec![1]);
        search.query("second");
        let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
        assert_eq!(documents_ids, vec![2]);
        search.query("third");
        let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
        assert_eq!(documents_ids, vec![3]);

        search.query("field");
        let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
        assert_eq!(documents_ids, vec![1, 2]);

        search.query("lol");
        let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
        assert_eq!(documents_ids, vec![2]);

        search.query("object");
        let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
        assert!(documents_ids.is_empty());

        search.query("array");
        let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
        assert!(documents_ids.is_empty()); // nested is not searchable

        search.query("lied");
        let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
        assert!(documents_ids.is_empty()); // nested is not searchable

        // testing the filters
        let mut search = crate::Search::new(&rtxn, &index);
        search.filter(crate::Filter::from_str(r#"title = "The first document""#).unwrap().unwrap());
        let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
        assert_eq!(documents_ids, vec![1]);

        search.filter(crate::Filter::from_str(r#"nested.object = field"#).unwrap().unwrap());
        let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
        assert_eq!(documents_ids, vec![1, 2]);

        search.filter(crate::Filter::from_str(r#"nested.machin = bidule"#).unwrap().unwrap());
        let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
        assert_eq!(documents_ids, vec![1]);

        search.filter(crate::Filter::from_str(r#"nested = array"#).unwrap().unwrap());
        let error = search.execute().map(|_| unreachable!()).unwrap_err(); // nested is not filterable
        assert!(matches!(error, crate::Error::UserError(crate::UserError::InvalidFilter(_))));

        search.filter(crate::Filter::from_str(r#"nested = "I lied""#).unwrap().unwrap());
        let error = search.execute().map(|_| unreachable!()).unwrap_err(); // nested is not filterable
        assert!(matches!(error, crate::Error::UserError(crate::UserError::InvalidFilter(_))));
    }

    #[test]
    fn index_documents_with_nested_primary_key() {
        let path = tempfile::tempdir().unwrap();
        let mut options = EnvOpenOptions::new();
        options.map_size(10 * 1024 * 1024); // 10 MB
        let index = Index::new(options, &path).unwrap();
        let config = IndexerConfig::default();

        let mut wtxn = index.write_txn().unwrap();
        let mut builder = update::Settings::new(&mut wtxn, &index, &config);
        builder.set_primary_key("complex.nested.id".to_owned());
        builder.execute(|_| ()).unwrap();
        wtxn.commit().unwrap();

        let mut wtxn = index.write_txn().unwrap();
        let content = documents!([
            {
                "complex": {
                    "nested": {
                        "id": 0,
                    },
                },
                "title": "The zeroth document",
            },
            {
                "complex.nested": {
                    "id": 1,
                },
                "title": "The first document",
            },
            {
                "complex": {
                    "nested.id": 2,
                },
                "title": "The second document",
            },
            {
                "complex.nested.id": 3,
                "title": "The third document",
            },
        ]);

        let indexing_config = IndexDocumentsConfig::default();
        let mut builder =
            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
                .unwrap();
        builder.add_documents(content).unwrap();
        builder.execute().unwrap();
        wtxn.commit().unwrap();

        let rtxn = index.read_txn().unwrap();

        // testing the simple query search
        let mut search = crate::Search::new(&rtxn, &index);
        search.query("document");
        search.authorize_typos(true);
        search.optional_words(true);
        // all documents should be returned
        let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
        assert_eq!(documents_ids.len(), 4);

        search.query("zeroth");
        let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
        assert_eq!(documents_ids, vec![0]);
        search.query("first");
        let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
        assert_eq!(documents_ids, vec![1]);
        search.query("second");
        let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
        assert_eq!(documents_ids, vec![2]);
        search.query("third");
        let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
        assert_eq!(documents_ids, vec![3]);
    }

    #[test]
    fn test_facets_generation() {
        let path = tempfile::tempdir().unwrap();
        let mut options = EnvOpenOptions::new();
        options.map_size(10 * 1024 * 1024); // 10 MB
        let index = Index::new(options, &path).unwrap();

        let mut wtxn = index.write_txn().unwrap();
        let content = documents!([
            {
                "id": 0,
                "dog": {
                    "race": {
                        "bernese mountain": "zeroth",
                    },
                },
            },
            {
                "id": 1,
                "dog.race": {
                    "bernese mountain": "first",
                },
            },
            {
                "id": 2,
                "dog.race.bernese mountain": "second",
            },
            {
                "id": 3,
                "dog": {
                    "race.bernese mountain": "third"
                },
            },
        ]);

        // index the documents
        let config = IndexerConfig::default();
        let indexing_config = IndexDocumentsConfig::default();
        let mut builder =
            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
                .unwrap();
        builder.add_documents(content).unwrap();
        builder.execute().unwrap();

        wtxn.commit().unwrap();

        // ---- ADD THE SETTING TO TEST THE FILTERABLE

        // add the settings
        let mut wtxn = index.write_txn().unwrap();
        let mut builder = update::Settings::new(&mut wtxn, &index, &config);

        builder.set_filterable_fields(hashset!(String::from("dog")));

        builder.execute(|_| ()).unwrap();
        wtxn.commit().unwrap();

        let rtxn = index.read_txn().unwrap();

        let hidden = index.faceted_fields(&rtxn).unwrap();

        assert_eq!(hidden, hashset!(S("dog"), S("dog.race"), S("dog.race.bernese mountain")));

        for (s, i) in [("zeroth", 0), ("first", 1), ("second", 2), ("third", 3)] {
            let mut search = crate::Search::new(&rtxn, &index);
            let filter = format!(r#""dog.race.bernese mountain" = {s}"#);
            search.filter(crate::Filter::from_str(&filter).unwrap().unwrap());
            let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
            assert_eq!(documents_ids, vec![i]);
        }

        // ---- RESET THE SETTINGS

        // update the settings
        let mut wtxn = index.write_txn().unwrap();
        let mut builder = update::Settings::new(&mut wtxn, &index, &config);

        builder.reset_filterable_fields();

        builder.execute(|_| ()).unwrap();
        wtxn.commit().unwrap();

        let rtxn = index.read_txn().unwrap();

        let facets = index.faceted_fields(&rtxn).unwrap();

        assert_eq!(facets, hashset!());

        // ---- UPDATE THE SETTINGS TO TEST THE SORTABLE

        // update the settings
        let mut wtxn = index.write_txn().unwrap();
        let mut builder = update::Settings::new(&mut wtxn, &index, &config);

        builder.set_sortable_fields(hashset!(S("dog.race")));

        builder.execute(|_| ()).unwrap();
        wtxn.commit().unwrap();

        let rtxn = index.read_txn().unwrap();

        let facets = index.faceted_fields(&rtxn).unwrap();

        assert_eq!(facets, hashset!(S("dog.race"), S("dog.race.bernese mountain")));

        let mut search = crate::Search::new(&rtxn, &index);
        search.sort_criteria(vec![crate::AscDesc::Asc(crate::Member::Field(S(
            "dog.race.bernese mountain",
        )))]);
        let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
        assert_eq!(documents_ids, vec![1, 2, 3, 0]);
    }

    #[test]
    fn index_2_times_documents_split_by_zero_document_indexation() {
        let path = tempfile::tempdir().unwrap();
        let mut options = EnvOpenOptions::new();
        options.map_size(10 * 1024 * 1024); // 10 MB
        let index = Index::new(options, &path).unwrap();

        let content = documents!([
            {"id": 0, "name": "Kerollmops", "score": 78},
            {"id": 1, "name": "ManyTheFish", "score": 75},
            {"id": 2, "name": "Ferdi", "score": 39},
            {"id": 3, "name": "Tommy", "score": 33}
        ]);

        let mut wtxn = index.write_txn().unwrap();
        let config = IndexerConfig::default();
        let indexing_config = IndexDocumentsConfig::default();
        let mut builder =
            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
                .unwrap();
        builder.add_documents(content).unwrap();
        builder.execute().unwrap();
        wtxn.commit().unwrap();

        // Check that there is 4 document now.
        let rtxn = index.read_txn().unwrap();
        let count = index.number_of_documents(&rtxn).unwrap();
        assert_eq!(count, 4);

        let content = documents!([]);

        let mut wtxn = index.write_txn().unwrap();
        let config = IndexerConfig::default();
        let indexing_config = IndexDocumentsConfig::default();
        let mut builder =
            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
                .unwrap();
        builder.add_documents(content).unwrap();
        builder.execute().unwrap();
        wtxn.commit().unwrap();

        // Check that there is 4 document now.
        let rtxn = index.read_txn().unwrap();
        let count = index.number_of_documents(&rtxn).unwrap();
        assert_eq!(count, 4);

        let content = documents!([
            {"id": 0, "name": "Kerollmops", "score": 78},
            {"id": 1, "name": "ManyTheFish", "score": 75},
            {"id": 2, "name": "Ferdi", "score": 39},
            {"id": 3, "name": "Tommy", "score": 33}
        ]);

        let mut wtxn = index.write_txn().unwrap();
        let config = IndexerConfig::default();
        let indexing_config = IndexDocumentsConfig::default();
        let mut builder =
            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
                .unwrap();
        builder.add_documents(content).unwrap();
        builder.execute().unwrap();
        wtxn.commit().unwrap();

        // Check that there is 4 document now.
        let rtxn = index.read_txn().unwrap();
        let count = index.number_of_documents(&rtxn).unwrap();
        assert_eq!(count, 4);
    }

    #[test]
    fn test_meilisearch_1714() {
        let path = tempfile::tempdir().unwrap();
        let mut options = EnvOpenOptions::new();
        options.map_size(10 * 1024 * 1024); // 10 MB
        let index = Index::new(options, &path).unwrap();

        let content = documents!([
          {"id": "123", "title": "小化妆包" },
          {"id": "456", "title": "Ipad 包" }
        ]);

        let mut wtxn = index.write_txn().unwrap();
        let config = IndexerConfig::default();
        let indexing_config = IndexDocumentsConfig::default();
        let mut builder =
            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
                .unwrap();
        builder.add_documents(content).unwrap();
        builder.execute().unwrap();
        wtxn.commit().unwrap();

        let rtxn = index.read_txn().unwrap();

        // Only the first document should match.
        let count = index.word_docids.get(&rtxn, "化妆包").unwrap().unwrap().len();
        assert_eq!(count, 1);

        // Only the second document should match.
        let count = index.word_docids.get(&rtxn, "包").unwrap().unwrap().len();
        assert_eq!(count, 1);

        let mut search = crate::Search::new(&rtxn, &index);
        search.query("化妆包");
        search.authorize_typos(true);
        search.optional_words(true);

        // only 1 document should be returned
        let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
        assert_eq!(documents_ids.len(), 1);
    }

    /// We try to index documents with words that are too long here,
    /// it should not return any error.
    #[test]
    fn text_with_too_long_words() {
        let path = tempfile::tempdir().unwrap();
        let mut options = EnvOpenOptions::new();
        options.map_size(10 * 1024 * 1024); // 10 MB
        let index = Index::new(options, &path).unwrap();

        let content = documents!([
          {"id": 1, "title": "a".repeat(256) },
          {"id": 2, "title": "b".repeat(512) },
          {"id": 3, "title": format!("{} {}", "c".repeat(250), "d".repeat(250)) },
        ]);

        let mut wtxn = index.write_txn().unwrap();
        let config = IndexerConfig::default();
        let indexing_config = IndexDocumentsConfig::default();
        let mut builder =
            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
                .unwrap();
        builder.add_documents(content).unwrap();
        builder.execute().unwrap();
        wtxn.commit().unwrap();
    }

    #[test]
    fn text_with_too_long_keys() {
        let path = tempfile::tempdir().unwrap();
        let mut options = EnvOpenOptions::new();
        options.map_size(10 * 1024 * 1024); // 10 MB
        let index = Index::new(options, &path).unwrap();
        let script = "https://bug.example.com/meilisearch/milli.saml2?ROLE=Programmer-1337&SAMLRequest=Cy1ytcZT1Po%2L2IY2y9Unru8rgnW4qWfPiI0EpT7P8xjJV8PeQikRL%2E8D9A4pj9tmbymbQCQwGmGjPMK7qwXFPX4DH52JO2b7n6TXjuR7zkIFuYdzdY2rwRNBPgCL7ihclEm9zyIjKZQ%2JTqiwfXxWjnI0KEYQYHdwd6Q%2Fx%28BDLNsvmL54CCY2F4RWeRs4eqWfn%2EHqxlhreFzax4AiQ2tgOtV5thOaaWqrhZD%2Py70nuyZWNTKwciGI43AoHg6PThANsQ5rAY5amzN%2ufbs1swETUXlLZuOut5YGpYPZfY6STJWNp4QYSUOUXBZpdElYsH7UHZ7VhJycgyt%28aTK0GW6GbKne2tJM0hgSczOqndg6RFa9WsnSBi4zMcaEfYur4WlSsHDYInF9ROousKqVMZ6H8%2gbUissaLh1eXRGo8KEJbyEHbhVVKGD%28kx4cfKjx9fT3pkeDTdvDrVn25jIzi9wHyt9l1lWc8ICnCvXCVUPP%2BjBG4wILR29gMV9Ux2QOieQm2%2Fycybhr8sBGCl30mHC7blvWt%2T3mrCHQoS3VK49PZNPqBZO9C7vOjOWoszNkJx4QckWV%2FZFvbpzUUkiBiehr9F%2FvQSxz9lzv68GwbTu9fr638p%2FQM%3D&RelayState=https%3A%2F%example.bug.com%2Fde&SigAlg=http%3A%2F%2Fwww.w3.org%2F2000%2F09%2Fxmldsig%23rsa-sha1&Signature=AZFpkhFFII7PodiewTovaGnLQKUVZp0qOCCcBIUkJ6P5by3lE3Lldj9pKaFu4wz4j%2B015HEhDvF0LlAmwwES85vdGh%2FpD%2cIQPRUEjdCbQkQDd3dy1mMXbpXxSe4QYcv9Ni7tqNTQxekpO1gE7rtg6zC66EU55uM9aj9abGQ034Vly%2F6IJ08bvAq%2B%2FB9KruLstuiNWnlXTfNGsOxGLK7%2BXr94LTkat8m%2FMan6Qr95%2KeR5TmmqaQIE4N9H6o4TopT7mXr5CF2Z3";

        // Create 200 documents with a long text
        let content = {
            let documents: Vec<_> = (0..200i32)
                .into_iter()
                .map(|i| serde_json::json!({ "id": i, "script": script }))
                .collect();

            let mut writer = std::io::Cursor::new(Vec::new());
            let mut builder = crate::documents::DocumentBatchBuilder::new(&mut writer).unwrap();
            let documents = serde_json::to_vec(&documents).unwrap();
            builder.extend_from_json(std::io::Cursor::new(documents)).unwrap();
            builder.finish().unwrap();
            writer.set_position(0);
            crate::documents::DocumentBatchReader::from_reader(writer).unwrap()
        };

        // Index those 200 long documents
        let mut wtxn = index.write_txn().unwrap();
        let config = IndexerConfig::default();
        let indexing_config = IndexDocumentsConfig::default();
        let mut builder =
            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
                .unwrap();
        builder.add_documents(content).unwrap();
        builder.execute().unwrap();

        // Create one long document
        let content = documents!([
          {"id": 400, "script": script },
        ]);

        // Index this one long document
        let config = IndexerConfig::default();
        let indexing_config = IndexDocumentsConfig::default();
        let mut builder =
            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
                .unwrap();
        builder.add_documents(content).unwrap();
        builder.execute().unwrap();

        wtxn.commit().unwrap();
    }

    #[test]
    fn index_documents_in_multiple_transforms() {
        let tmp = tempfile::tempdir().unwrap();
        let mut options = EnvOpenOptions::new();
        options.map_size(4096 * 100);
        let index = Index::new(options, tmp).unwrap();
        let mut wtxn = index.write_txn().unwrap();
        let indexer_config = IndexerConfig::default();
        let mut builder = IndexDocuments::new(
            &mut wtxn,
            &index,
            &indexer_config,
            IndexDocumentsConfig::default(),
            |_| (),
        )
        .unwrap();

        let doc1 = documents! {[{
            "id": 228142,
            "title": "asdsad",
            "state": "automated",
            "priority": "normal",
            "public_uid": "37ccf021",
            "project_id": 78207,
            "branch_id_number": 0
        }]};

        let doc2 = documents! {[{
            "id": 228143,
            "title": "something",
            "state": "automated",
            "priority": "normal",
            "public_uid": "39c6499b",
            "project_id": 78207,
            "branch_id_number": 0
        }]};

        builder.add_documents(doc1).unwrap();
        builder.add_documents(doc2).unwrap();

        builder.execute().unwrap();

        let map = index.external_documents_ids(&wtxn).unwrap().to_hash_map();
        let ids = map.values().collect::<HashSet<_>>();

        assert_eq!(ids.len(), map.len());
    }

    #[test]
    fn index_documents_check_exists_database_reindex() {
        let path = tempfile::tempdir().unwrap();
        let mut options = EnvOpenOptions::new();
        options.map_size(10 * 1024 * 1024); // 10 MB
        let index = Index::new(options, &path).unwrap();

        let mut wtxn = index.write_txn().unwrap();
        let content = documents!([
            {
                "id": 0,
                "colour": 0,
            },
            {
                "id": 1,
                "colour": []
            },
            {
                "id": 2,
                "colour": {}
            },
            {
                "id": 3,
                "colour": null
            },
            {
                "id": 4,
                "colour": [1]
            },
            {
                "id": 5
            },
            {
                "id": 6,
                "colour": {
                    "green": 1
                }
            }
        ]);

        let config = IndexerConfig::default();
        let indexing_config = IndexDocumentsConfig::default();
        let mut builder =
            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
                .unwrap();
        builder.add_documents(content).unwrap();
        builder.execute().unwrap();

        wtxn.commit().unwrap();

        let mut wtxn = index.write_txn().unwrap();
        let mut builder = update::Settings::new(&mut wtxn, &index, &config);

        let faceted_fields = hashset!(S("colour"));
        builder.set_filterable_fields(faceted_fields);
        builder.execute(|_| ()).unwrap();
        wtxn.commit().unwrap();

        let rtxn = index.read_txn().unwrap();
        let facets = index.faceted_fields(&rtxn).unwrap();
        assert_eq!(facets, hashset!(S("colour"), S("colour.green")));

        let colour_id = index.fields_ids_map(&rtxn).unwrap().id("colour").unwrap();
        let colour_green_id = index.fields_ids_map(&rtxn).unwrap().id("colour.green").unwrap();

        let bitmap_colour = index.facet_id_exists_docids.get(&rtxn, &colour_id).unwrap().unwrap();
        assert_eq!(bitmap_colour.into_iter().collect::<Vec<_>>(), vec![0, 1, 2, 3, 4, 6]);

        let bitmap_colour_green =
            index.facet_id_exists_docids.get(&rtxn, &colour_green_id).unwrap().unwrap();
        assert_eq!(bitmap_colour_green.into_iter().collect::<Vec<_>>(), vec![6]);
    }

    #[test]
    fn index_documents_check_exists_database() {
        let path = tempfile::tempdir().unwrap();
        let mut options = EnvOpenOptions::new();
        options.map_size(10 * 1024 * 1024); // 10 MB
        let index = Index::new(options, &path).unwrap();

        let config = IndexerConfig::default();

        let mut wtxn = index.write_txn().unwrap();
        let mut builder = update::Settings::new(&mut wtxn, &index, &config);

        let faceted_fields = hashset!(S("colour"));
        builder.set_filterable_fields(faceted_fields);
        builder.execute(|_| ()).unwrap();
        wtxn.commit().unwrap();

        let content = documents!([
            {
                "id": 0,
                "colour": 0,
            },
            {
                "id": 1,
                "colour": []
            },
            {
                "id": 2,
                "colour": {}
            },
            {
                "id": 3,
                "colour": null
            },
            {
                "id": 4,
                "colour": [1]
            },
            {
                "id": 5
            },
            {
                "id": 6,
                "colour": {
                    "green": 1
                }
            }
        ]);

        let indexing_config = IndexDocumentsConfig::default();

        let mut wtxn = index.write_txn().unwrap();

        let mut builder =
            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
                .unwrap();
        builder.add_documents(content).unwrap();
        builder.execute().unwrap();

        wtxn.commit().unwrap();

        let rtxn = index.read_txn().unwrap();
        let facets = index.faceted_fields(&rtxn).unwrap();
        assert_eq!(facets, hashset!(S("colour"), S("colour.green")));

        let colour_id = index.fields_ids_map(&rtxn).unwrap().id("colour").unwrap();
        let colour_green_id = index.fields_ids_map(&rtxn).unwrap().id("colour.green").unwrap();

        let bitmap_colour = index.facet_id_exists_docids.get(&rtxn, &colour_id).unwrap().unwrap();
        assert_eq!(bitmap_colour.into_iter().collect::<Vec<_>>(), vec![0, 1, 2, 3, 4, 6]);

        let bitmap_colour_green =
            index.facet_id_exists_docids.get(&rtxn, &colour_green_id).unwrap().unwrap();
        assert_eq!(bitmap_colour_green.into_iter().collect::<Vec<_>>(), vec![6]);
    }
}
-												Plug new indexer

											
										
										
											2021-08-16 13:36:30 +02:00
+								mod extract;
 								mod helpers;
 								mod transform;
 								mod typed_chunk;
-												Make sure that the indexing Store only index searchable fields

											
										
										
											2020-11-03 13:42:29 +01:00
+								use std::collections::HashSet;
-												extract exact_word_prefix_docids

											
										
										
											2022-03-25 16:17:55 +01:00
+								use std::io::{Cursor, Read, Seek};
-												Plug new indexer

											
										
										
											2021-08-16 13:36:30 +02:00
+								use std::iter::FromIterator;
-												Make the attribute positions range bounds to be fixed

											
										
										
											2021-03-24 15:06:54 +01:00
+								use std::num::{NonZeroU32, NonZeroUsize};
-												Introduce the UpdateBuilder and use it in the HTTP routes

											
										
										
											2020-10-26 20:18:10 +01:00
-												Plug new indexer

											
										
										
											2021-08-16 13:36:30 +02:00
+								use crossbeam_channel::{Receiver, Sender};
-												extract exact_word_prefix_docids

											
										
										
											2022-03-25 16:17:55 +01:00
+								use heed::types::Str;
 								use heed::Database;
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								use log::debug;
-												Plug new indexer

											
										
										
											2021-08-16 13:36:30 +02:00
+								use roaring::RoaringBitmap;
-												format the whole project

											
										
										
											2021-06-16 18:33:33 +02:00
+								use serde::{Deserialize, Serialize};
-												Compute the new, common and, deleted prefix words fst once

											
										
										
											2022-01-27 11:00:18 +01:00
+								use slice_group_by::GroupBy;
-												Plug new indexer

											
										
										
											2021-08-16 13:36:30 +02:00
+								use typed_chunk::{write_typed_chunk_into_index, TypedChunk};
-												Introduce the searchable parameter settings to the Settings update

											
										
										
											2020-11-03 13:20:11 +01:00
-												Plug new indexer

											
										
										
											2021-08-16 13:36:30 +02:00
+								pub use self::helpers::{
-												Change the behavior of the as_cloneable_grenad by taking a ref

											
										
										
											2022-02-16 15:40:08 +01:00
+								    as_cloneable_grenad, create_sorter, create_writer, fst_stream_into_hashset,
 								    fst_stream_into_vec, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps,
-												Make sure that we do not generate too long keys

											
										
										
											2022-05-03 09:57:03 +02:00
+								    sorter_into_lmdb_database, valid_lmdb_key, write_into_lmdb_database, writer_into_reader,
 								    ClonableMmap, MergeFn,
-												Introduce the UpdateBuilder and use it in the HTTP routes

											
										
										
											2020-10-26 20:18:10 +01:00
+								};
-												extract exact_word_prefix_docids

											
										
										
											2022-03-25 16:17:55 +01:00
+								use self::helpers::{grenad_obkv_into_chunks, GrenadParameters};
-												Introduce the UpdateBuilder and use it in the HTTP routes

											
										
										
											2020-10-26 20:18:10 +01:00
+								pub use self::transform::{Transform, TransformOutput};
-												Implement documents format

document reader transform

remove update format

support document sequences

fix document transform

clean transform

improve error handling

add documents! macro

fix transform bug

fix tests

remove csv dependency

Add comments on the transform process

replace search cli

fmt

review edits

fix http ui

fix clippy warnings

Revert "fix clippy warnings"

This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620.

fix review comments

remove smallvec in transform loop

review edits

											
										
										
											2021-08-31 11:44:15 +02:00
+								use crate::documents::DocumentBatchReader;
-												Bring the newly created word pair proximity docids

											
										
										
											2022-01-18 14:59:51 +01:00
+								pub use crate::update::index_documents::helpers::CursorClonableMmap;
-												format the whole project

											
										
										
											2021-06-16 18:33:33 +02:00
+								use crate::update::{
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								    self, Facets, IndexerConfig, UpdateIndexingStep, WordPrefixDocids,
 								    WordPrefixPairProximityDocids, WordPrefixPositionDocids, WordsPrefixesFst,
-												format the whole project

											
										
										
											2021-06-16 18:33:33 +02:00
+								};
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								use crate::{Index, Result, RoaringBitmapCodec, UserError};
-												Introduce the UpdateBuilder and use it in the HTTP routes

											
										
										
											2020-10-26 20:18:10 +01:00
-												Fix test and use progress callback

											
										
										
											2021-08-17 10:56:06 +02:00
+								static MERGED_DATABASE_COUNT: usize = 7;
 								static PREFIX_DATABASE_COUNT: usize = 5;
 								static TOTAL_POSTING_DATABASE_COUNT: usize = MERGED_DATABASE_COUNT + PREFIX_DATABASE_COUNT;
-												improve document addition returned metaimprove document addition
returned metaimprove document addition returned metaimprove document
addition returned metaimprove document addition returned metaimprove
document addition returned metaimprove document addition returned
metaimprove document addition returned meta

											
										
										
											2021-11-10 14:08:36 +01:00
+								#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
-												return documents number on addition

											
										
										
											2020-12-30 18:43:50 +01:00
+								pub struct DocumentAdditionResult {
-												improve document addition returned metaimprove document addition
returned metaimprove document addition returned metaimprove document
addition returned metaimprove document addition returned metaimprove
document addition returned metaimprove document addition returned
metaimprove document addition returned meta

											
										
										
											2021-11-10 14:08:36 +01:00
+								    /// The number of documents that were indexed during the update
 								    pub indexed_documents: u64,
 								    /// The total number of documents in the index after the update
 								    pub number_of_documents: u64,
-												return documents number on addition

											
										
										
											2020-12-30 18:43:50 +01:00
+								}
-												derive serde for method and format

This is nicer when working with UpdateMeta struct

											
										
										
											2020-12-22 18:17:35 +01:00
+								#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
-												Update the Transform struct to support JSON updates

											
										
										
											2020-10-31 16:10:15 +01:00
+								#[non_exhaustive]
-												Move the IndexDocuments update into its own module

											
										
										
											2020-10-26 11:02:44 +01:00
+								pub enum IndexDocumentsMethod {
 								    /// Replace the previous document with the new one,
 								    /// removing all the already known attributes.
 								    ReplaceDocuments,
 								    /// Merge the previous version of the document with the new version,
 								    /// replacing old attributes values with the new ones and add the new attributes.
 								    UpdateDocuments,
 								}
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								impl Default for IndexDocumentsMethod {
 								    fn default() -> Self {
 								        Self::ReplaceDocuments
 								    }
 								}
 								pub struct IndexDocuments<'t, 'u, 'i, 'a, F> {
-												Update heed to 0.10.0

											
										
										
											2020-10-30 11:42:00 +01:00
+								    wtxn: &'t mut heed::RwTxn<'i, 'u>,
-												Move the IndexDocuments update into its own module

											
										
										
											2020-10-26 11:02:44 +01:00
+								    index: &'i Index,
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								    config: IndexDocumentsConfig,
 								    indexer_config: &'a IndexerConfig,
 								    transform: Option<Transform<'a, 'i>>,
 								    progress: F,
 								    added_documents: u64,
 								}
 								#[derive(Default, Debug, Clone)]
 								pub struct IndexDocumentsConfig {
 								    pub facet_level_group_size: Option<NonZeroUsize>,
 								    pub facet_min_level_size: Option<NonZeroUsize>,
 								    pub words_prefix_threshold: Option<u32>,
 								    pub max_prefix_length: Option<usize>,
 								    pub words_positions_level_group_size: Option<NonZeroU32>,
 								    pub words_positions_min_level_size: Option<NonZeroU32>,
 								    pub update_method: IndexDocumentsMethod,
 								    pub autogenerate_docids: bool,
-												Move the IndexDocuments update into its own module

											
										
										
											2020-10-26 11:02:44 +01:00
+								}
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								impl<'t, 'u, 'i, 'a, F> IndexDocuments<'t, 'u, 'i, 'a, F>
 								where
 								    F: Fn(UpdateIndexingStep) + Sync,
 								{
-												Use update_id in UpdateBuilder

Add `the update_id` to the to the updates. The rationale is the
following:
- It allows for better tracability of the update events, thus improved
  debugging and logging.
- The enigne is now aware of what he's already processed, and can return
  it if asked. It may not make sense now, but in the future, the update
  store may not work the same way, and this information about the state
  of the engine will be desirable (distributed environement).

											
										
										
											2020-12-22 16:21:07 +01:00
+								    pub fn new(
 								        wtxn: &'t mut heed::RwTxn<'i, 'u>,
 								        index: &'i Index,
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        indexer_config: &'a IndexerConfig,
 								        config: IndexDocumentsConfig,
 								        progress: F,
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								    ) -> Result<IndexDocuments<'t, 'u, 'i, 'a, F>> {
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        let transform = Some(Transform::new(
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								            wtxn,
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								            &index,
 								            indexer_config,
 								            config.update_method,
 								            config.autogenerate_docids,
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								        )?);
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								        Ok(IndexDocuments {
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								            transform,
 								            config,
 								            indexer_config,
 								            progress,
-												Introduce the UpdateBuilder and use it in the HTTP routes

											
										
										
											2020-10-26 20:18:10 +01:00
+								            wtxn,
 								            index,
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								            added_documents: 0,
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								        })
-												Introduce the UpdateBuilder and use it in the HTTP routes

											
										
										
											2020-10-26 20:18:10 +01:00
+								    }
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								    /// Adds a batch of documents to the current builder.
 								    ///
 								    /// Since the documents are progressively added to the writer, a failure will cause a stale
 								    /// builder, and the builder must be discarded.
 								    ///
 								    /// Returns the number of documents added to the builder.
 								    pub fn add_documents<R>(&mut self, reader: DocumentBatchReader<R>) -> Result<u64>
-												Introduce the UpdateBuilder and use it in the HTTP routes

											
										
										
											2020-10-26 20:18:10 +01:00
+								    where
-												Implement documents format

document reader transform

remove update format

support document sequences

fix document transform

clean transform

improve error handling

add documents! macro

fix transform bug

fix tests

remove csv dependency

Add comments on the transform process

replace search cli

fmt

review edits

fix http ui

fix clippy warnings

Revert "fix clippy warnings"

This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620.

fix review comments

remove smallvec in transform loop

review edits

											
										
										
											2021-08-31 11:44:15 +02:00
+								        R: Read + Seek,
-												Introduce the UpdateBuilder and use it in the HTTP routes

											
										
										
											2020-10-26 20:18:10 +01:00
+								    {
-												meilisearch compatible primary key inference

											
										
										
											2021-05-06 21:16:40 +02:00
+								        // Early return when there is no document to add
-												Implement documents format

document reader transform

remove update format

support document sequences

fix document transform

clean transform

improve error handling

add documents! macro

fix transform bug

fix tests

remove csv dependency

Add comments on the transform process

replace search cli

fmt

review edits

fix http ui

fix clippy warnings

Revert "fix clippy warnings"

This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620.

fix review comments

remove smallvec in transform loop

review edits

											
										
										
											2021-08-31 11:44:15 +02:00
+								        if reader.is_empty() {
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								            return Ok(0);
-												early return on empty document addition

											
										
										
											2021-05-06 18:14:16 +02:00
+								        }
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        let indexed_documents = self
 								            .transform
 								            .as_mut()
 								            .expect("Invalid document addition state")
 								            .read_documents(reader, self.wtxn, &self.progress)?
 								            as u64;
-												Introduce the UpdateBuilder and use it in the HTTP routes

											
										
										
											2020-10-26 20:18:10 +01:00
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        self.added_documents += indexed_documents;
-												return documents number on addition

											
										
										
											2020-12-30 18:43:50 +01:00
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        Ok(indexed_documents)
 								    }
-												Introduce the searchable parameter settings to the Settings update

											
										
										
											2020-11-03 13:20:11 +01:00
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								    #[logging_timer::time("IndexDocuments::{}")]
 								    pub fn execute(mut self) -> Result<DocumentAdditionResult> {
 								        if self.added_documents == 0 {
 								            let number_of_documents = self.index.number_of_documents(self.wtxn)?;
 								            return Ok(DocumentAdditionResult { indexed_documents: 0, number_of_documents });
 								        }
 								        let output = self
 								            .transform
 								            .take()
 								            .expect("Invalid document addition state")
 								            .output_from_sorter(self.wtxn, &self.progress)?;
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
 								        let new_facets = output.compute_real_facets(self.wtxn, self.index)?;
 								        self.index.put_faceted_fields(self.wtxn, &new_facets)?;
-												fix the searchable fields bug when a field is nested

Update milli/src/index.rs

Co-authored-by: Clément Renault <clement@meilisearch.com>

											
										
										
											2022-05-16 15:22:52 +02:00
+								        // in case new fields were introduced we're going to recreate the searchable fields.
 								        if let Some(faceted_fields) = self.index.user_defined_searchable_fields(self.wtxn)? {
 								            // we can't keep references on the faceted fields while we update the index thus we need to own it.
 								            let faceted_fields: Vec<String> =
 								                faceted_fields.into_iter().map(str::to_string).collect();
 								            self.index.put_all_searchable_fields_from_fields_ids_map(
 								                self.wtxn,
 								                &faceted_fields.iter().map(String::as_ref).collect::<Vec<_>>(),
 								                &output.fields_ids_map,
 								            )?;
 								        }
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        let indexed_documents = output.documents_count as u64;
 								        let number_of_documents = self.execute_raw(output)?;
-												Introduce the searchable parameter settings to the Settings update

											
										
										
											2020-11-03 13:20:11 +01:00
-												improve document addition returned metaimprove document addition
returned metaimprove document addition returned metaimprove document
addition returned metaimprove document addition returned metaimprove
document addition returned metaimprove document addition returned
metaimprove document addition returned meta

											
										
										
											2021-11-10 14:08:36 +01:00
+								        Ok(DocumentAdditionResult { indexed_documents, number_of_documents })
 								    }
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
-												improve document addition returned metaimprove document addition
returned metaimprove document addition returned metaimprove document
addition returned metaimprove document addition returned metaimprove
document addition returned metaimprove document addition returned
metaimprove document addition returned meta

											
										
										
											2021-11-10 14:08:36 +01:00
+								    /// Returns the total number of documents in the index after the update.
-												Add logging timers

											
										
										
											2021-08-24 13:55:53 +02:00
+								    #[logging_timer::time("IndexDocuments::{}")]
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								    pub fn execute_raw(self, output: TransformOutput) -> Result<u64>
-												Introduce the searchable parameter settings to the Settings update

											
										
										
											2020-11-03 13:20:11 +01:00
+								    where
-												format the whole project

											
										
										
											2021-06-16 18:33:33 +02:00
+								        F: Fn(UpdateIndexingStep) + Sync,
-												Introduce the searchable parameter settings to the Settings update

											
										
										
											2020-11-03 13:20:11 +01:00
+								    {
-												Introduce the UpdateBuilder and use it in the HTTP routes

											
										
										
											2020-10-26 20:18:10 +01:00
+								        let TransformOutput {
-												Generate a uuid v4 based document id when missing

											
										
										
											2020-10-31 12:54:43 +01:00
+								            primary_key,
-												Introduce the UpdateBuilder and use it in the HTTP routes

											
										
										
											2020-10-26 20:18:10 +01:00
+								            fields_ids_map,
-												rename fields_distribution in field_distribution

											
										
										
											2021-06-17 15:16:20 +02:00
+								            field_distribution,
-												Rename the users ids documents ids into external documents ids

											
										
										
											2020-11-22 11:54:04 +01:00
+								            external_documents_ids,
-												Introduce the UpdateBuilder and use it in the HTTP routes

											
										
										
											2020-10-26 20:18:10 +01:00
+								            new_documents_ids,
 								            replaced_documents_ids,
 								            documents_count,
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								            original_documents,
 								            flattened_documents,
-												Update the Transform struct to support JSON updates

											
										
										
											2020-10-31 16:10:15 +01:00
+								        } = output;
-												Introduce the UpdateBuilder and use it in the HTTP routes

											
										
										
											2020-10-26 20:18:10 +01:00
-												Introduce an empty FilterCondition variant to support unknown fields

											
										
										
											2021-07-27 16:24:21 +02:00
+								        // The fields_ids_map is put back to the store now so the rest of the transaction sees an
 								        // up to date field map.
 								        self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
-												Plug new indexer

											
										
										
											2021-08-16 13:36:30 +02:00
+								        let backup_pool;
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        let pool = match self.indexer_config.thread_pool {
 								            Some(ref pool) => pool,
-												Plug new indexer

											
										
										
											2021-08-16 13:36:30 +02:00
+								            #[cfg(not(test))]
 								            None => {
 								                // We initialize a bakcup pool with the default
 								                // settings if none have already been set.
 								                backup_pool = rayon::ThreadPoolBuilder::new().build()?;
 								                &backup_pool
 								            }
 								            #[cfg(test)]
 								            None => {
 								                // We initialize a bakcup pool with the default
 								                // settings if none have already been set.
 								                backup_pool = rayon::ThreadPoolBuilder::new().num_threads(1).build()?;
 								                &backup_pool
 								            }
 								        };
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								        let original_documents = grenad::Reader::new(original_documents)?;
 								        let flattened_documents = grenad::Reader::new(flattened_documents)?;
-												Plug new indexer

											
										
										
											2021-08-16 13:36:30 +02:00
 								        // create LMDB writer channel
-												Remove unwrap sending errors in channel

											
										
										
											2021-08-24 13:01:31 +02:00
+								        let (lmdb_writer_sx, lmdb_writer_rx): (
 								            Sender<Result<TypedChunk>>,
 								            Receiver<Result<TypedChunk>>,
 								        ) = crossbeam_channel::unbounded();
-												Plug new indexer

											
										
										
											2021-08-16 13:36:30 +02:00
-												improve the error handling in general and introduce the concept of reserved keywords

											
										
										
											2021-09-02 15:57:40 +02:00
+								        // get the primary key field id
-												edit the two lasts TODO comments

											
										
										
											2021-09-08 18:12:10 +02:00
+								        let primary_key_id = fields_ids_map.id(&primary_key).unwrap();
-												improve the error handling in general and introduce the concept of reserved keywords

											
										
										
											2021-09-02 15:57:40 +02:00
-												Plug new indexer

											
										
										
											2021-08-16 13:36:30 +02:00
+								        // get searchable fields for word databases
 								        let searchable_fields =
 								            self.index.searchable_fields_ids(self.wtxn)?.map(HashSet::from_iter);
 								        // get filterable fields for facet databases
 								        let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?;
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								        // get the fid of the `_geo.lat` and `_geo.lng` fields.
 								        let geo_fields_ids = match self.index.fields_ids_map(self.wtxn)?.id("_geo") {
-												Apply suggestions from code review

Co-authored-by: Clément Renault <clement@meilisearch.com>
											
										
										
											2021-09-09 12:20:08 +02:00
+								            Some(gfid) => {
 								                let is_sortable = self.index.sortable_fields_ids(self.wtxn)?.contains(&gfid);
 								                let is_filterable = self.index.filterable_fields_ids(self.wtxn)?.contains(&gfid);
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								                // if `_geo` is faceted then we get the `lat` and `lng`
-												Apply suggestions from code review

Co-authored-by: Clément Renault <clement@meilisearch.com>
											
										
										
											2021-09-09 12:20:08 +02:00
+								                if is_sortable || is_filterable {
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								                    let field_ids = self
 								                        .index
 								                        .fields_ids_map(self.wtxn)?
 								                        .insert("_geo.lat")
 								                        .zip(self.index.fields_ids_map(self.wtxn)?.insert("_geo.lng"))
 								                        .ok_or(UserError::AttributeLimitReached)?;
 								                    Some(field_ids)
-												Apply suggestions from code review

Co-authored-by: Clément Renault <clement@meilisearch.com>
											
										
										
											2021-09-09 12:20:08 +02:00
+								                } else {
 								                    None
 								                }
 								            }
 								            None => None,
-												only index _geo if it's set as sortable OR filterable

and only allow the filters if geo was set to filterable

											
										
										
											2021-08-30 15:47:33 +02:00
+								        };
-												Plug new indexer

											
										
										
											2021-08-16 13:36:30 +02:00
-												Take stop word in account

											
										
										
											2021-08-17 12:25:07 +02:00
+								        let stop_words = self.index.stop_words(self.wtxn)?;
-												extract exact word docids

											
										
										
											2022-03-24 17:00:29 +01:00
+								        let exact_attributes = self.index.exact_attributes_ids(self.wtxn)?;
-												Take stop word in account

											
										
										
											2021-08-17 12:25:07 +02:00
-												Plug new indexer

											
										
										
											2021-08-16 13:36:30 +02:00
+								        // Run extraction pipeline in parallel.
 								        pool.install(|| {
 								            let params = GrenadParameters {
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								                chunk_compression_type: self.indexer_config.chunk_compression_type,
 								                chunk_compression_level: self.indexer_config.chunk_compression_level,
 								                max_memory: self.indexer_config.max_memory,
 								                max_nb_chunks: self.indexer_config.max_nb_chunks, // default value, may be chosen.
-												Plug new indexer

											
										
										
											2021-08-16 13:36:30 +02:00
+								            };
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								            // split obkv file into several chunks
 								            let original_chunk_iter = grenad_obkv_into_chunks(
 								                original_documents,
 								                params.clone(),
 								                self.indexer_config.documents_chunk_size.unwrap_or(1024 * 1024 * 4), // 4MiB
 								            );
 								            // split obkv file into several chunks
 								            let flattened_chunk_iter = grenad_obkv_into_chunks(
 								                flattened_documents,
-												Plug new indexer

											
										
										
											2021-08-16 13:36:30 +02:00
+								                params.clone(),
-												Revert "Revert "Change chunk size to 4MiB to fit more the end user usage""

											
										
										
											2021-11-18 17:04:09 +01:00
+								                self.indexer_config.documents_chunk_size.unwrap_or(1024 * 1024 * 4), // 4MiB
-												Remove unwrap sending errors in channel

											
										
										
											2021-08-24 13:01:31 +02:00
+								            );
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								            let result = original_chunk_iter
 								                .and_then(|original_chunk_iter| Ok((original_chunk_iter, flattened_chunk_iter?)))
 								                .map(|(original_chunk, flattened_chunk)| {
 								                    // extract all databases from the chunked obkv douments
 								                    extract::data_from_obkv_documents(
 								                        original_chunk,
 								                        flattened_chunk,
 								                        params,
 								                        lmdb_writer_sx.clone(),
 								                        searchable_fields,
 								                        faceted_fields,
 								                        primary_key_id,
 								                        geo_fields_ids,
 								                        stop_words,
 								                        self.indexer_config.max_positions_per_attributes,
 								                        exact_attributes,
 								                    )
 								                });
-												Remove unwrap sending errors in channel

											
										
										
											2021-08-24 13:01:31 +02:00
 								            if let Err(e) = result {
-												Ignore errors comming from crossbeam channel senders

											
										
										
											2021-08-26 11:01:30 +02:00
+								                let _ = lmdb_writer_sx.send(Err(e));
-												Remove unwrap sending errors in channel

											
										
										
											2021-08-24 13:01:31 +02:00
+								            }
 								            // needs to be droped to avoid channel waiting lock.
 								            drop(lmdb_writer_sx)
-												Plug new indexer

											
										
										
											2021-08-16 13:36:30 +02:00
+								        });
-												Introduce the UpdateBuilder and use it in the HTTP routes

											
										
										
											2020-10-26 20:18:10 +01:00
+								        // We delete the documents that this document addition replaces. This way we are
 								        // able to simply insert all the documents even if they already exist in the database.
 								        if !replaced_documents_ids.is_empty() {
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								            let mut deletion_builder = update::DeleteDocuments::new(self.wtxn, self.index)?;
-												Introduce a little bit of debug when deleting documents

											
										
										
											2020-11-19 11:18:52 +01:00
+								            debug!("documents to delete {:?}", replaced_documents_ids);
-												Introduce the UpdateBuilder and use it in the HTTP routes

											
										
										
											2020-10-26 20:18:10 +01:00
+								            deletion_builder.delete_documents(&replaced_documents_ids);
-												Introduce a little bit of debug when deleting documents

											
										
										
											2020-11-19 11:18:52 +01:00
+								            let deleted_documents_count = deletion_builder.execute()?;
-												improve document addition returned metaimprove document addition
returned metaimprove document addition returned metaimprove document
addition returned metaimprove document addition returned metaimprove
document addition returned metaimprove document addition returned
metaimprove document addition returned meta

											
										
										
											2021-11-10 14:08:36 +01:00
+								            debug!("{} documents actually deleted", deleted_documents_count.deleted_documents);
-												Introduce the UpdateBuilder and use it in the HTTP routes

											
										
										
											2020-10-26 20:18:10 +01:00
+								        }
-												Plug new indexer

											
										
										
											2021-08-16 13:36:30 +02:00
+								        let index_documents_ids = self.index.documents_ids(self.wtxn)?;
 								        let index_is_empty = index_documents_ids.len() == 0;
 								        let mut final_documents_ids = RoaringBitmap::new();
-												Remove a useless grenad file merging

											
										
										
											2022-02-28 10:14:54 +01:00
+								        let mut word_pair_proximity_docids = None;
 								        let mut word_position_docids = None;
 								        let mut word_docids = None;
-												query exact_word_docids in resolve_query_tree

											
										
										
											2022-03-24 19:25:11 +01:00
+								        let mut exact_word_docids = None;
-												Plug new indexer

											
										
										
											2021-08-16 13:36:30 +02:00
-												Fix test and use progress callback

											
										
										
											2021-08-17 10:56:06 +02:00
+								        let mut databases_seen = 0;
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        (self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
-												Fix test and use progress callback

											
										
										
											2021-08-17 10:56:06 +02:00
+								            databases_seen,
 								            total_databases: TOTAL_POSTING_DATABASE_COUNT,
 								        });
-												Bring the newly created word pair proximity docids

											
										
										
											2022-01-18 14:59:51 +01:00
+								        for result in lmdb_writer_rx {
 								            let typed_chunk = match result? {
-												introduce exact_word_docids db

											
										
										
											2022-03-24 15:22:57 +01:00
+								                TypedChunk::WordDocids { word_docids_reader, exact_word_docids_reader } => {
 								                    let cloneable_chunk = unsafe { as_cloneable_grenad(&word_docids_reader)? };
-												Remove a useless grenad file merging

											
										
										
											2022-02-28 10:14:54 +01:00
+								                    word_docids = Some(cloneable_chunk);
-												introduce exact_word_docids db

											
										
										
											2022-03-24 15:22:57 +01:00
+								                    let cloneable_chunk =
 								                        unsafe { as_cloneable_grenad(&exact_word_docids_reader)? };
-												query exact_word_docids in resolve_query_tree

											
										
										
											2022-03-24 19:25:11 +01:00
+								                    exact_word_docids = Some(cloneable_chunk);
-												introduce exact_word_docids db

											
										
										
											2022-03-24 15:22:57 +01:00
+								                    TypedChunk::WordDocids { word_docids_reader, exact_word_docids_reader }
-												Rework the WordPrefixDocids update to compute a subset of the database

											
										
										
											2022-01-19 15:02:04 +01:00
+								                }
-												Bring the newly created word pair proximity docids

											
										
										
											2022-01-18 14:59:51 +01:00
+								                TypedChunk::WordPairProximityDocids(chunk) => {
-												Change the behavior of the as_cloneable_grenad by taking a ref

											
										
										
											2022-02-16 15:40:08 +01:00
+								                    let cloneable_chunk = unsafe { as_cloneable_grenad(&chunk)? };
-												Remove a useless grenad file merging

											
										
										
											2022-02-28 10:14:54 +01:00
+								                    word_pair_proximity_docids = Some(cloneable_chunk);
-												Bring the newly created word pair proximity docids

											
										
										
											2022-01-18 14:59:51 +01:00
+								                    TypedChunk::WordPairProximityDocids(chunk)
 								                }
-												Rework the WordsPrefixPositionDocids update to compute a subset of the database

											
										
										
											2022-01-25 14:06:45 +01:00
+								                TypedChunk::WordPositionDocids(chunk) => {
-												Change the behavior of the as_cloneable_grenad by taking a ref

											
										
										
											2022-02-16 15:40:08 +01:00
+								                    let cloneable_chunk = unsafe { as_cloneable_grenad(&chunk)? };
-												Remove a useless grenad file merging

											
										
										
											2022-02-28 10:14:54 +01:00
+								                    word_position_docids = Some(cloneable_chunk);
-												Rework the WordsPrefixPositionDocids update to compute a subset of the database

											
										
										
											2022-01-25 14:06:45 +01:00
+								                    TypedChunk::WordPositionDocids(chunk)
 								                }
-												Bring the newly created word pair proximity docids

											
										
										
											2022-01-18 14:59:51 +01:00
+								                otherwise => otherwise,
 								            };
-												Fix test and use progress callback

											
										
										
											2021-08-17 10:56:06 +02:00
+								            let (docids, is_merged_database) =
-												Bring the newly created word pair proximity docids

											
										
										
											2022-01-18 14:59:51 +01:00
+								                write_typed_chunk_into_index(typed_chunk, &self.index, self.wtxn, index_is_empty)?;
-												Fix test and use progress callback

											
										
										
											2021-08-17 10:56:06 +02:00
+								            if !docids.is_empty() {
 								                final_documents_ids |= docids;
 								                let documents_seen_count = final_documents_ids.len();
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								                (self.progress)(UpdateIndexingStep::IndexDocuments {
-												Fix test and use progress callback

											
										
										
											2021-08-17 10:56:06 +02:00
+								                    documents_seen: documents_seen_count as usize,
 								                    total_documents: documents_count,
 								                });
 								                debug!(
 								                    "We have seen {} documents on {} total document so far",
 								                    documents_seen_count, documents_count
 								                );
 								            }
 								            if is_merged_database {
 								                databases_seen += 1;
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								                (self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
-												improve document addition returned metaimprove document addition
returned metaimprove document addition returned metaimprove document
addition returned metaimprove document addition returned metaimprove
document addition returned metaimprove document addition returned
metaimprove document addition returned meta

											
										
										
											2021-11-10 14:08:36 +01:00
+								                    databases_seen,
-												Fix test and use progress callback

											
										
										
											2021-08-17 10:56:06 +02:00
+								                    total_databases: TOTAL_POSTING_DATABASE_COUNT,
 								                });
 								            }
-												Introduce the UpdateBuilder and use it in the HTTP routes

											
										
										
											2020-10-26 20:18:10 +01:00
+								        }
-												rename fields_distribution in field_distribution

											
										
										
											2021-06-17 15:16:20 +02:00
+								        // We write the field distribution into the main database
 								        self.index.put_field_distribution(self.wtxn, &field_distribution)?;
-												feat(index): store fields distribution in index

											
										
										
											2021-03-31 17:14:23 +02:00
-												Generate a uuid v4 based document id when missing

											
										
										
											2020-10-31 12:54:43 +01:00
+								        // We write the primary key field id into the main database
-												Fix settings bug

replace ids with str in settings

This allows for better maintainability of the settings code, since
updating the searchable attributes is now straightforward.

criterion use string

fix reindexing fieldid remaping

add tests for primary_key compute

fix tests

fix http-ui

fixup! add tests for primary_key compute

code improvements settings

update deps

fixup! code improvements settings

fixup! refactor settings updates and fix bug

fixup! Fix settings bug

fixup! Fix settings bug

fixup! Fix settings bug

Update src/update/index_documents/transform.rs

Co-authored-by: Clément Renault <clement@meilisearch.com>

fixup! Fix settings bug

											
										
										
											2021-01-20 17:27:43 +01:00
+								        self.index.put_primary_key(self.wtxn, &primary_key)?;
-												Generate a uuid v4 based document id when missing

											
										
										
											2020-10-31 12:54:43 +01:00
-												Rename the users ids documents ids into external documents ids

											
										
										
											2020-11-22 11:54:04 +01:00
+								        // We write the external documents ids into the main database.
 								        self.index.put_external_documents_ids(self.wtxn, &external_documents_ids)?;
-												Introduce the UpdateBuilder and use it in the HTTP routes

											
										
										
											2020-10-26 20:18:10 +01:00
-												Fix test and use progress callback

											
										
										
											2021-08-17 10:56:06 +02:00
+								        let all_documents_ids = index_documents_ids | new_documents_ids | replaced_documents_ids;
-												Plug new indexer

											
										
										
											2021-08-16 13:36:30 +02:00
+								        self.index.put_documents_ids(self.wtxn, &all_documents_ids)?;
-												Make the indexing process send the new progress step events

											
										
										
											2020-11-11 12:39:09 +01:00
-												Rework the WordsPrefixPositionDocids update to compute a subset of the database

											
										
										
											2022-01-25 14:06:45 +01:00
+								        self.execute_prefix_databases(
-												Replace the ugly unwraps by clean if let Somes

											
										
										
											2022-02-28 16:00:33 +01:00
+								            word_docids,
-												query exact_word_docids in resolve_query_tree

											
										
										
											2022-03-24 19:25:11 +01:00
+								            exact_word_docids,
-												Replace the ugly unwraps by clean if let Somes

											
										
										
											2022-02-28 16:00:33 +01:00
+								            word_pair_proximity_docids,
 								            word_position_docids,
-												Rework the WordsPrefixPositionDocids update to compute a subset of the database

											
										
										
											2022-01-25 14:06:45 +01:00
+								        )?;
-												improve document addition returned metaimprove document addition
returned metaimprove document addition returned metaimprove document
addition returned metaimprove document addition returned metaimprove
document addition returned metaimprove document addition returned
metaimprove document addition returned meta

											
										
										
											2021-11-10 14:08:36 +01:00
 								        Ok(all_documents_ids.len())
-												Plug new indexer

											
										
										
											2021-08-16 13:36:30 +02:00
+								    }
-												Introduce the UpdateBuilder and use it in the HTTP routes

											
										
										
											2020-10-26 20:18:10 +01:00
-												Add logging timers

											
										
										
											2021-08-24 13:55:53 +02:00
+								    #[logging_timer::time("IndexDocuments::{}")]
-												Bring the newly created word pair proximity docids

											
										
										
											2022-01-18 14:59:51 +01:00
+								    pub fn execute_prefix_databases(
 								        self,
-												Replace the ugly unwraps by clean if let Somes

											
										
										
											2022-02-28 16:00:33 +01:00
+								        word_docids: Option<grenad::Reader<CursorClonableMmap>>,
-												query exact_word_docids in resolve_query_tree

											
										
										
											2022-03-24 19:25:11 +01:00
+								        exact_word_docids: Option<grenad::Reader<CursorClonableMmap>>,
-												Replace the ugly unwraps by clean if let Somes

											
										
										
											2022-02-28 16:00:33 +01:00
+								        word_pair_proximity_docids: Option<grenad::Reader<CursorClonableMmap>>,
 								        word_position_docids: Option<grenad::Reader<CursorClonableMmap>>,
-												Bring the newly created word pair proximity docids

											
										
										
											2022-01-18 14:59:51 +01:00
+								    ) -> Result<()>
 								    where
 								        F: Fn(UpdateIndexingStep) + Sync,
 								    {
-												Fix test and use progress callback

											
										
										
											2021-08-17 10:56:06 +02:00
+								        // Merged databases are already been indexed, we start from this count;
 								        let mut databases_seen = MERGED_DATABASE_COUNT;
-												Run the words prefixes update inside of the indexing documents update

											
										
										
											2021-02-10 11:53:13 +01:00
+								        // Run the facets update operation.
-												remove update_id in UpdateBuilder

											
										
										
											2021-11-03 13:12:01 +01:00
+								        let mut builder = Facets::new(self.wtxn, self.index);
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        builder.chunk_compression_type = self.indexer_config.chunk_compression_type;
 								        builder.chunk_compression_level = self.indexer_config.chunk_compression_level;
 								        if let Some(value) = self.config.facet_level_group_size {
-												Make the facet levels maps to previous level groups and don't split them

											
										
										
											2020-11-28 12:43:43 +01:00
+								            builder.level_group_size(value);
-												Introduce a new update for the facet levels

											
										
										
											2020-11-17 21:19:25 +01:00
+								        }
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        if let Some(value) = self.config.facet_min_level_size {
-												Make the facet levels maps to previous level groups and don't split them

											
										
										
											2020-11-28 12:43:43 +01:00
+								            builder.min_level_size(value);
-												Introduce a function to retrieve the facet level range docids

											
										
										
											2020-11-18 16:29:07 +01:00
+								        }
-												Introduce a new update for the facet levels

											
										
										
											2020-11-17 21:19:25 +01:00
+								        builder.execute()?;
-												Store the first word positions levels

											
										
										
											2021-03-11 17:23:46 +01:00
-												Fix test and use progress callback

											
										
										
											2021-08-17 10:56:06 +02:00
+								        databases_seen += 1;
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        (self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
 								            databases_seen,
-												Fix test and use progress callback

											
										
										
											2021-08-17 10:56:06 +02:00
+								            total_databases: TOTAL_POSTING_DATABASE_COUNT,
 								        });
-												Retrieve the previous version of the words prefixes FST

											
										
										
											2022-01-18 14:02:24 +01:00
+								        let previous_words_prefixes_fst =
 								            self.index.words_prefixes_fst(self.wtxn)?.map_data(|cow| cow.into_owned())?;
-												Run the words prefixes update inside of the indexing documents update

											
										
										
											2021-02-10 11:53:13 +01:00
+								        // Run the words prefixes update operation.
-												remove update_id in UpdateBuilder

											
										
										
											2021-11-03 13:12:01 +01:00
+								        let mut builder = WordsPrefixesFst::new(self.wtxn, self.index);
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        if let Some(value) = self.config.words_prefix_threshold {
-												Run the words prefixes update inside of the indexing documents update

											
										
										
											2021-02-10 11:53:13 +01:00
+								            builder.threshold(value);
 								        }
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        if let Some(value) = self.config.max_prefix_length {
-												Run the words prefixes update inside of the indexing documents update

											
										
										
											2021-02-10 11:53:13 +01:00
+								            builder.max_prefix_length(value);
 								        }
 								        builder.execute()?;
-												Compute the new, common and, deleted prefix words fst once

											
										
										
											2022-01-27 11:00:18 +01:00
+								        let current_prefix_fst = self.index.words_prefixes_fst(self.wtxn)?;
 								        // We retrieve the common words between the previous and new prefix word fst.
 								        let common_prefix_fst_words = fst_stream_into_vec(
 								            previous_words_prefixes_fst.op().add(&current_prefix_fst).intersection(),
 								        );
 								        let common_prefix_fst_words: Vec<_> = common_prefix_fst_words
 								            .as_slice()
 								            .linear_group_by_key(|x| x.chars().nth(0).unwrap())
 								            .collect();
 								        // We retrieve the newly added words between the previous and new prefix word fst.
 								        let new_prefix_fst_words = fst_stream_into_vec(
 								            current_prefix_fst.op().add(&previous_words_prefixes_fst).difference(),
 								        );
 								        // We compute the set of prefixes that are no more part of the prefix fst.
 								        let del_prefix_fst_words = fst_stream_into_hashset(
 								            previous_words_prefixes_fst.op().add(&current_prefix_fst).difference(),
 								        );
-												Fix test and use progress callback

											
										
										
											2021-08-17 10:56:06 +02:00
+								        databases_seen += 1;
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        (self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
 								            databases_seen,
-												Fix test and use progress callback

											
										
										
											2021-08-17 10:56:06 +02:00
+								            total_databases: TOTAL_POSTING_DATABASE_COUNT,
 								        });
-												Replace the ugly unwraps by clean if let Somes

											
										
										
											2022-02-28 16:00:33 +01:00
+								        if let Some(word_docids) = word_docids {
-												extract exact_word_prefix_docids

											
										
										
											2022-03-25 16:17:55 +01:00
+								            execute_word_prefix_docids(
-												refactor WordPrefixDocids to take dbs instead of indexes

											
										
										
											2022-03-25 10:20:39 +01:00
+								                self.wtxn,
-												extract exact_word_prefix_docids

											
										
										
											2022-03-25 16:17:55 +01:00
+								                word_docids,
-												refactor WordPrefixDocids to take dbs instead of indexes

											
										
										
											2022-03-25 10:20:39 +01:00
+								                self.index.word_docids.clone(),
 								                self.index.word_prefix_docids.clone(),
-												extract exact_word_prefix_docids

											
										
										
											2022-03-25 16:17:55 +01:00
+								                &self.indexer_config,
 								                &new_prefix_fst_words,
 								                &common_prefix_fst_words,
 								                &del_prefix_fst_words,
 								            )?;
 								        }
 								        if let Some(exact_word_docids) = exact_word_docids {
 								            execute_word_prefix_docids(
 								                self.wtxn,
 								                exact_word_docids,
 								                self.index.exact_word_docids.clone(),
 								                self.index.exact_word_prefix_docids.clone(),
 								                &self.indexer_config,
-												Replace the ugly unwraps by clean if let Somes

											
										
										
											2022-02-28 16:00:33 +01:00
+								                &new_prefix_fst_words,
 								                &common_prefix_fst_words,
 								                &del_prefix_fst_words,
 								            )?;
 								        }
-												Compute the words prefixes at the end of an update

											
										
										
											2021-03-25 11:10:12 +01:00
-												Fix test and use progress callback

											
										
										
											2021-08-17 10:56:06 +02:00
+								        databases_seen += 1;
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        (self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
 								            databases_seen,
-												Fix test and use progress callback

											
										
										
											2021-08-17 10:56:06 +02:00
+								            total_databases: TOTAL_POSTING_DATABASE_COUNT,
 								        });
-												Replace the ugly unwraps by clean if let Somes

											
										
										
											2022-02-28 16:00:33 +01:00
+								        if let Some(word_pair_proximity_docids) = word_pair_proximity_docids {
 								            // Run the word prefix pair proximity docids update operation.
 								            let mut builder = WordPrefixPairProximityDocids::new(self.wtxn, self.index);
 								            builder.chunk_compression_type = self.indexer_config.chunk_compression_type;
 								            builder.chunk_compression_level = self.indexer_config.chunk_compression_level;
 								            builder.max_nb_chunks = self.indexer_config.max_nb_chunks;
 								            builder.max_memory = self.indexer_config.max_memory;
 								            builder.execute(
 								                word_pair_proximity_docids,
 								                &new_prefix_fst_words,
 								                &common_prefix_fst_words,
 								                &del_prefix_fst_words,
 								            )?;
 								        }
-												Compute the words prefixes at the end of an update

											
										
										
											2021-03-25 11:10:12 +01:00
-												Fix test and use progress callback

											
										
										
											2021-08-17 10:56:06 +02:00
+								        databases_seen += 1;
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        (self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
 								            databases_seen,
-												Fix test and use progress callback

											
										
										
											2021-08-17 10:56:06 +02:00
+								            total_databases: TOTAL_POSTING_DATABASE_COUNT,
 								        });
-												Replace the ugly unwraps by clean if let Somes

											
										
										
											2022-02-28 16:00:33 +01:00
+								        if let Some(word_position_docids) = word_position_docids {
 								            // Run the words prefix position docids update operation.
 								            let mut builder = WordPrefixPositionDocids::new(self.wtxn, self.index);
 								            builder.chunk_compression_type = self.indexer_config.chunk_compression_type;
 								            builder.chunk_compression_level = self.indexer_config.chunk_compression_level;
 								            builder.max_nb_chunks = self.indexer_config.max_nb_chunks;
 								            builder.max_memory = self.indexer_config.max_memory;
 								            if let Some(value) = self.config.words_positions_level_group_size {
 								                builder.level_group_size(value);
 								            }
 								            if let Some(value) = self.config.words_positions_min_level_size {
 								                builder.min_level_size(value);
 								            }
 								            builder.execute(
 								                word_position_docids,
 								                &new_prefix_fst_words,
 								                &common_prefix_fst_words,
 								                &del_prefix_fst_words,
 								            )?;
-												Expose and use the WordsLevelPositions update

											
										
										
											2021-03-17 13:55:24 +01:00
+								        }
-												Fix test and use progress callback

											
										
										
											2021-08-17 10:56:06 +02:00
+								        databases_seen += 1;
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        (self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
 								            databases_seen,
-												Fix test and use progress callback

											
										
										
											2021-08-17 10:56:06 +02:00
+								            total_databases: TOTAL_POSTING_DATABASE_COUNT,
 								        });
-												Introduce the UpdateBuilder and use it in the HTTP routes

											
										
										
											2020-10-26 20:18:10 +01:00
+								        Ok(())
-												Move the IndexDocuments update into its own module

											
										
										
											2020-10-26 11:02:44 +01:00
+								    }
 								}
-												Fix a documents indexing bug and add a test

											
										
										
											2020-10-30 12:14:25 +01:00
-												extract exact_word_prefix_docids

											
										
										
											2022-03-25 16:17:55 +01:00
+								/// Run the word prefix docids update operation.
 								fn execute_word_prefix_docids(
 								    txn: &mut heed::RwTxn,
 								    reader: grenad::Reader<Cursor<ClonableMmap>>,
 								    word_docids_db: Database<Str, RoaringBitmapCodec>,
 								    word_prefix_docids_db: Database<Str, RoaringBitmapCodec>,
 								    indexer_config: &IndexerConfig,
 								    new_prefix_fst_words: &[String],
 								    common_prefix_fst_words: &[&[String]],
 								    del_prefix_fst_words: &HashSet<Vec<u8>>,
 								) -> Result<()> {
 								    let cursor = reader.into_cursor()?;
 								    let mut builder = WordPrefixDocids::new(txn, word_docids_db, word_prefix_docids_db);
 								    builder.chunk_compression_type = indexer_config.chunk_compression_type;
 								    builder.chunk_compression_level = indexer_config.chunk_compression_level;
 								    builder.max_nb_chunks = indexer_config.max_nb_chunks;
 								    builder.max_memory = indexer_config.max_memory;
 								    builder.execute(
 								        cursor,
 								        &new_prefix_fst_words,
 								        &common_prefix_fst_words,
 								        &del_prefix_fst_words,
 								    )?;
 								    Ok(())
 								}
-												Fix a documents indexing bug and add a test

											
										
										
											2020-10-30 12:14:25 +01:00
+								#[cfg(test)]
 								mod tests {
-												Add a test for the words level positions generation bug

											
										
										
											2021-06-23 18:35:44 +02:00
+								    use std::io::Cursor;
-												Add a test to check that we can index more that 256 fields

											
										
										
											2021-07-06 11:40:45 +02:00
+								    use big_s::S;
-												Fix a documents indexing bug and add a test

											
										
										
											2020-10-30 12:14:25 +01:00
+								    use heed::EnvOpenOptions;
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								    use maplit::hashset;
-												Fix a documents indexing bug and add a test

											
										
										
											2020-10-30 12:14:25 +01:00
-												format the whole project

											
										
										
											2021-06-16 18:33:33 +02:00
+								    use super::*;
-												Implement documents format

document reader transform

remove update format

support document sequences

fix document transform

clean transform

improve error handling

add documents! macro

fix transform bug

fix tests

remove csv dependency

Add comments on the transform process

replace search cli

fmt

review edits

fix http ui

fix clippy warnings

Revert "fix clippy warnings"

This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620.

fix review comments

remove smallvec in transform loop

review edits

											
										
										
											2021-08-31 11:44:15 +02:00
+								    use crate::documents::DocumentBatchBuilder;
-												Add a test for when we insert a previously deleted document

											
										
										
											2021-06-30 11:23:29 +02:00
+								    use crate::update::DeleteDocuments;
-												Add a test to check that we can index more that 256 fields

											
										
										
											2021-07-06 11:40:45 +02:00
+								    use crate::HashMap;
-												format the whole project

											
										
										
											2021-06-16 18:33:33 +02:00
-												Fix a documents indexing bug and add a test

											
										
										
											2020-10-30 12:14:25 +01:00
+								    #[test]
-												Add a test to check that merging works correctly with CSVs

											
										
										
											2020-10-30 13:46:56 +01:00
+								    fn simple_document_replacement() {
-												Fix a documents indexing bug and add a test

											
										
										
											2020-10-30 12:14:25 +01:00
+								        let path = tempfile::tempdir().unwrap();
 								        let mut options = EnvOpenOptions::new();
 								        options.map_size(10 * 1024 * 1024); // 10 MB
 								        let index = Index::new(options, &path).unwrap();
 								        // First we send 3 documents with ids from 1 to 3.
 								        let mut wtxn = index.write_txn().unwrap();
-												Implement documents format

document reader transform

remove update format

support document sequences

fix document transform

clean transform

improve error handling

add documents! macro

fix transform bug

fix tests

remove csv dependency

Add comments on the transform process

replace search cli

fmt

review edits

fix http ui

fix clippy warnings

Revert "fix clippy warnings"

This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620.

fix review comments

remove smallvec in transform loop

review edits

											
										
										
											2021-08-31 11:44:15 +02:00
+								        let content = documents!([
 								            { "id": 1, "name": "kevin" },
 								            { "id": 2, "name": "kevina" },
 								            { "id": 3, "name": "benoit" }
 								        ]);
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
 								        let config = IndexerConfig::default();
 								        let indexing_config = IndexDocumentsConfig::default();
 								        let mut builder =
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
 								                .unwrap();
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        builder.add_documents(content).unwrap();
 								        builder.execute().unwrap();
-												Fix a documents indexing bug and add a test

											
										
										
											2020-10-30 12:14:25 +01:00
+								        wtxn.commit().unwrap();
 								        // Check that there is 3 documents now.
 								        let rtxn = index.read_txn().unwrap();
 								        let count = index.number_of_documents(&rtxn).unwrap();
 								        assert_eq!(count, 3);
 								        drop(rtxn);
 								        // Second we send 1 document with id 1, to erase the previous ones.
 								        let mut wtxn = index.write_txn().unwrap();
-												Implement documents format

document reader transform

remove update format

support document sequences

fix document transform

clean transform

improve error handling

add documents! macro

fix transform bug

fix tests

remove csv dependency

Add comments on the transform process

replace search cli

fmt

review edits

fix http ui

fix clippy warnings

Revert "fix clippy warnings"

This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620.

fix review comments

remove smallvec in transform loop

review edits

											
										
										
											2021-08-31 11:44:15 +02:00
+								        let content = documents!([ { "id": 1, "name": "updated kevin" } ]);
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        let mut builder =
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
 								                .unwrap();
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        builder.add_documents(content).unwrap();
 								        builder.execute().unwrap();
-												Fix a documents indexing bug and add a test

											
										
										
											2020-10-30 12:14:25 +01:00
+								        wtxn.commit().unwrap();
-												Add a test to check that merging works correctly with CSVs

											
										
										
											2020-10-30 13:46:56 +01:00
+								        // Check that there is **always** 3 documents.
-												Fix a documents indexing bug and add a test

											
										
										
											2020-10-30 12:14:25 +01:00
+								        let rtxn = index.read_txn().unwrap();
 								        let count = index.number_of_documents(&rtxn).unwrap();
 								        assert_eq!(count, 3);
 								        drop(rtxn);
 								        // Third we send 3 documents again to replace the existing ones.
 								        let mut wtxn = index.write_txn().unwrap();
-												Implement documents format

document reader transform

remove update format

support document sequences

fix document transform

clean transform

improve error handling

add documents! macro

fix transform bug

fix tests

remove csv dependency

Add comments on the transform process

replace search cli

fmt

review edits

fix http ui

fix clippy warnings

Revert "fix clippy warnings"

This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620.

fix review comments

remove smallvec in transform loop

review edits

											
										
										
											2021-08-31 11:44:15 +02:00
+								        let content = documents!([
 								            { "id": 1, "name": "updated second kevin" },
 								            { "id": 2, "name": "updated kevina" },
 								            { "id": 3, "name": "updated benoit" }
 								        ]);
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								        let mut builder =
 								            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        builder.add_documents(content).unwrap();
-												Fix a documents indexing bug and add a test

											
										
										
											2020-10-30 12:14:25 +01:00
+								        wtxn.commit().unwrap();
-												Add a test to check that merging works correctly with CSVs

											
										
										
											2020-10-30 13:46:56 +01:00
+								        // Check that there is **always** 3 documents.
-												Fix a documents indexing bug and add a test

											
										
										
											2020-10-30 12:14:25 +01:00
+								        let rtxn = index.read_txn().unwrap();
 								        let count = index.number_of_documents(&rtxn).unwrap();
 								        assert_eq!(count, 3);
 								        drop(rtxn);
 								    }
-												Add a test to check that merging works correctly with CSVs

											
										
										
											2020-10-30 13:46:56 +01:00
 								    #[test]
 								    fn simple_document_merge() {
 								        let path = tempfile::tempdir().unwrap();
 								        let mut options = EnvOpenOptions::new();
 								        options.map_size(10 * 1024 * 1024); // 10 MB
 								        let index = Index::new(options, &path).unwrap();
 								        // First we send 3 documents with duplicate ids and
 								        // change the index method to merge documents.
 								        let mut wtxn = index.write_txn().unwrap();
-												Implement documents format

document reader transform

remove update format

support document sequences

fix document transform

clean transform

improve error handling

add documents! macro

fix transform bug

fix tests

remove csv dependency

Add comments on the transform process

replace search cli

fmt

review edits

fix http ui

fix clippy warnings

Revert "fix clippy warnings"

This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620.

fix review comments

remove smallvec in transform loop

review edits

											
										
										
											2021-08-31 11:44:15 +02:00
+								        let content = documents!([
 								            { "id": 1, "name": "kevin" },
 								            { "id": 1, "name": "kevina" },
 								            { "id": 1, "name": "benoit" }
 								        ]);
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        let config = IndexerConfig::default();
 								        let indexing_config = IndexDocumentsConfig {
 								            update_method: IndexDocumentsMethod::UpdateDocuments,
 								            ..Default::default()
 								        };
 								        let mut builder =
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
 								                .unwrap();
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        builder.add_documents(content).unwrap();
 								        builder.execute().unwrap();
-												Add a test to check that merging works correctly with CSVs

											
										
										
											2020-10-30 13:46:56 +01:00
+								        wtxn.commit().unwrap();
 								        // Check that there is only 1 document now.
 								        let rtxn = index.read_txn().unwrap();
 								        let count = index.number_of_documents(&rtxn).unwrap();
 								        assert_eq!(count, 1);
 								        // Check that we get only one document from the database.
 								        let docs = index.documents(&rtxn, Some(0)).unwrap();
 								        assert_eq!(docs.len(), 1);
 								        let (id, doc) = docs[0];
 								        assert_eq!(id, 0);
 								        // Check that this document is equal to the last one sent.
 								        let mut doc_iter = doc.iter();
-												stop casting integer docids to string

											
										
										
											2021-09-28 18:35:54 +02:00
+								        assert_eq!(doc_iter.next(), Some((0, &b"1"[..])));
-												Add a test to check that merging works correctly with CSVs

											
										
										
											2020-10-30 13:46:56 +01:00
+								        assert_eq!(doc_iter.next(), Some((1, &br#""benoit""#[..])));
 								        assert_eq!(doc_iter.next(), None);
 								        drop(rtxn);
 								        // Second we send 1 document with id 1, to force it to be merged with the previous one.
 								        let mut wtxn = index.write_txn().unwrap();
-												Implement documents format

document reader transform

remove update format

support document sequences

fix document transform

clean transform

improve error handling

add documents! macro

fix transform bug

fix tests

remove csv dependency

Add comments on the transform process

replace search cli

fmt

review edits

fix http ui

fix clippy warnings

Revert "fix clippy warnings"

This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620.

fix review comments

remove smallvec in transform loop

review edits

											
										
										
											2021-08-31 11:44:15 +02:00
+								        let content = documents!([ { "id": 1, "age": 25 } ]);
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								        let mut builder =
 								            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        builder.add_documents(content).unwrap();
 								        builder.execute().unwrap();
-												Add a test to check that merging works correctly with CSVs

											
										
										
											2020-10-30 13:46:56 +01:00
+								        wtxn.commit().unwrap();
 								        // Check that there is **always** 1 document.
 								        let rtxn = index.read_txn().unwrap();
 								        let count = index.number_of_documents(&rtxn).unwrap();
 								        assert_eq!(count, 1);
 								        // Check that we get only one document from the database.
-												Apply review suggestions

Co-authored-by: Clément Renault <clement@meilisearch.com>

											
										
										
											2022-06-29 06:44:16 +02:00
+								        // Since the document has been deleted and re-inserted, its internal docid has been incremented to 1
-												Fasten the document deletion

When a document deletion occurs, instead of deleting the document we mark it as deleted
in the new “soft deleted” bitmap. It is then removed from the search, and all the other
endpoints.

											
										
										
											2022-06-13 17:59:34 +02:00
+								        let docs = index.documents(&rtxn, Some(1)).unwrap();
-												Add a test to check that merging works correctly with CSVs

											
										
										
											2020-10-30 13:46:56 +01:00
+								        assert_eq!(docs.len(), 1);
 								        let (id, doc) = docs[0];
-												Fasten the document deletion

When a document deletion occurs, instead of deleting the document we mark it as deleted
in the new “soft deleted” bitmap. It is then removed from the search, and all the other
endpoints.

											
										
										
											2022-06-13 17:59:34 +02:00
+								        assert_eq!(id, 1);
-												Add a test to check that merging works correctly with CSVs

											
										
										
											2020-10-30 13:46:56 +01:00
 								        // Check that this document is equal to the last one sent.
 								        let mut doc_iter = doc.iter();
-												stop casting integer docids to string

											
										
										
											2021-09-28 18:35:54 +02:00
+								        assert_eq!(doc_iter.next(), Some((0, &b"1"[..])));
-												Add a test to check that merging works correctly with CSVs

											
										
										
											2020-10-30 13:46:56 +01:00
+								        assert_eq!(doc_iter.next(), Some((1, &br#""benoit""#[..])));
-												stop casting integer docids to string

											
										
										
											2021-09-28 18:35:54 +02:00
+								        assert_eq!(doc_iter.next(), Some((2, &b"25"[..])));
-												Add a test to check that merging works correctly with CSVs

											
										
										
											2020-10-30 13:46:56 +01:00
+								        assert_eq!(doc_iter.next(), None);
 								        drop(rtxn);
 								    }
-												Generate a uuid v4 based document id when missing

											
										
										
											2020-10-31 12:54:43 +01:00
-												Introduce a parameter to disable the engine to autogenerate docids

											
										
										
											2020-10-31 21:46:55 +01:00
+								    #[test]
-												Implement documents format

document reader transform

remove update format

support document sequences

fix document transform

clean transform

improve error handling

add documents! macro

fix transform bug

fix tests

remove csv dependency

Add comments on the transform process

replace search cli

fmt

review edits

fix http ui

fix clippy warnings

Revert "fix clippy warnings"

This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620.

fix review comments

remove smallvec in transform loop

review edits

											
										
										
											2021-08-31 11:44:15 +02:00
+								    fn not_auto_generated_documents_ids() {
-												Introduce a parameter to disable the engine to autogenerate docids

											
										
										
											2020-10-31 21:46:55 +01:00
+								        let path = tempfile::tempdir().unwrap();
 								        let mut options = EnvOpenOptions::new();
 								        options.map_size(10 * 1024 * 1024); // 10 MB
 								        let index = Index::new(options, &path).unwrap();
 								        // First we send 3 documents with ids from 1 to 3.
 								        let mut wtxn = index.write_txn().unwrap();
-												Implement documents format

document reader transform

remove update format

support document sequences

fix document transform

clean transform

improve error handling

add documents! macro

fix transform bug

fix tests

remove csv dependency

Add comments on the transform process

replace search cli

fmt

review edits

fix http ui

fix clippy warnings

Revert "fix clippy warnings"

This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620.

fix review comments

remove smallvec in transform loop

review edits

											
										
										
											2021-08-31 11:44:15 +02:00
+								        let content = documents!([
-												Introduce a parameter to disable the engine to autogenerate docids

											
										
										
											2020-10-31 21:46:55 +01:00
+								            { "name": "kevin" },
-												Implement documents format

document reader transform

remove update format

support document sequences

fix document transform

clean transform

improve error handling

add documents! macro

fix transform bug

fix tests

remove csv dependency

Add comments on the transform process

replace search cli

fmt

review edits

fix http ui

fix clippy warnings

Revert "fix clippy warnings"

This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620.

fix review comments

remove smallvec in transform loop

review edits

											
										
										
											2021-08-31 11:44:15 +02:00
+								            { "name": "kevina" },
-												Introduce a parameter to disable the engine to autogenerate docids

											
										
										
											2020-10-31 21:46:55 +01:00
+								            { "name": "benoit" }
-												Implement documents format

document reader transform

remove update format

support document sequences

fix document transform

clean transform

improve error handling

add documents! macro

fix transform bug

fix tests

remove csv dependency

Add comments on the transform process

replace search cli

fmt

review edits

fix http ui

fix clippy warnings

Revert "fix clippy warnings"

This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620.

fix review comments

remove smallvec in transform loop

review edits

											
										
										
											2021-08-31 11:44:15 +02:00
+								        ]);
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        let config = IndexerConfig::default();
 								        let indexing_config = IndexDocumentsConfig::default();
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								        let mut builder =
 								            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        assert!(builder.add_documents(content).is_err());
-												Introduce a parameter to disable the engine to autogenerate docids

											
										
										
											2020-10-31 21:46:55 +01:00
+								        wtxn.commit().unwrap();
 								        // Check that there is no document.
 								        let rtxn = index.read_txn().unwrap();
 								        let count = index.number_of_documents(&rtxn).unwrap();
 								        assert_eq!(count, 0);
 								        drop(rtxn);
 								    }
-												Generate a uuid v4 based document id when missing

											
										
										
											2020-10-31 12:54:43 +01:00
+								    #[test]
 								    fn simple_auto_generated_documents_ids() {
 								        let path = tempfile::tempdir().unwrap();
 								        let mut options = EnvOpenOptions::new();
 								        options.map_size(10 * 1024 * 1024); // 10 MB
 								        let index = Index::new(options, &path).unwrap();
 								        // First we send 3 documents with ids from 1 to 3.
 								        let mut wtxn = index.write_txn().unwrap();
-												Implement documents format

document reader transform

remove update format

support document sequences

fix document transform

clean transform

improve error handling

add documents! macro

fix transform bug

fix tests

remove csv dependency

Add comments on the transform process

replace search cli

fmt

review edits

fix http ui

fix clippy warnings

Revert "fix clippy warnings"

This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620.

fix review comments

remove smallvec in transform loop

review edits

											
										
										
											2021-08-31 11:44:15 +02:00
+								        let content = documents!([
 								            { "name": "kevin" },
 								            { "name": "kevina" },
 								            { "name": "benoit" }
 								        ]);
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        let config = IndexerConfig::default();
 								        let indexing_config =
 								            IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
 								        let mut builder =
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
 								                .unwrap();
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        builder.add_documents(content).unwrap();
 								        builder.execute().unwrap();
-												Generate a uuid v4 based document id when missing

											
										
										
											2020-10-31 12:54:43 +01:00
+								        wtxn.commit().unwrap();
 								        // Check that there is 3 documents now.
 								        let rtxn = index.read_txn().unwrap();
 								        let count = index.number_of_documents(&rtxn).unwrap();
 								        assert_eq!(count, 3);
 								        let docs = index.documents(&rtxn, vec![0, 1, 2]).unwrap();
 								        let (_id, obkv) = docs.iter().find(|(_id, kv)| kv.get(0) == Some(br#""kevin""#)).unwrap();
 								        let kevin_uuid: String = serde_json::from_slice(&obkv.get(1).unwrap()).unwrap();
 								        drop(rtxn);
 								        // Second we send 1 document with the generated uuid, to erase the previous ones.
 								        let mut wtxn = index.write_txn().unwrap();
-												Implement documents format

document reader transform

remove update format

support document sequences

fix document transform

clean transform

improve error handling

add documents! macro

fix transform bug

fix tests

remove csv dependency

Add comments on the transform process

replace search cli

fmt

review edits

fix http ui

fix clippy warnings

Revert "fix clippy warnings"

This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620.

fix review comments

remove smallvec in transform loop

review edits

											
										
										
											2021-08-31 11:44:15 +02:00
+								        let content = documents!([ { "name": "updated kevin", "id": kevin_uuid } ]);
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								        let mut builder =
 								            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        builder.add_documents(content).unwrap();
 								        builder.execute().unwrap();
-												Generate a uuid v4 based document id when missing

											
										
										
											2020-10-31 12:54:43 +01:00
+								        wtxn.commit().unwrap();
 								        // Check that there is **always** 3 documents.
 								        let rtxn = index.read_txn().unwrap();
 								        let count = index.number_of_documents(&rtxn).unwrap();
 								        assert_eq!(count, 3);
-												Fix a bug where generated docids were not saved when indexing JSON docs

											
										
										
											2020-11-01 12:14:44 +01:00
-												Fasten the document deletion

When a document deletion occurs, instead of deleting the document we mark it as deleted
in the new “soft deleted” bitmap. It is then removed from the search, and all the other
endpoints.

											
										
										
											2022-06-13 17:59:34 +02:00
+								        // the document 0 has been deleted and reinserted with the id 3
 								        let docs = index.documents(&rtxn, vec![1, 2, 3]).unwrap();
 								        let kevin_position =
 								            docs.iter().position(|(_, d)| d.get(0).unwrap() == br#""updated kevin""#).unwrap();
 								        assert_eq!(kevin_position, 2);
 								        let (_, doc) = docs[kevin_position];
-												Fix a bug where generated docids were not saved when indexing JSON docs

											
										
										
											2020-11-01 12:14:44 +01:00
 								        // Check that this document is equal to the last
 								        // one sent and that an UUID has been generated.
-												Validate documents ids before accepting them

											
										
										
											2020-11-01 16:43:12 +01:00
+								        assert_eq!(doc.get(0), Some(&br#""updated kevin""#[..]));
-												Fix a bug where generated docids were not saved when indexing JSON docs

											
										
										
											2020-11-01 12:14:44 +01:00
+								        // This is an UUID, it must be 36 bytes long plus the 2 surrounding string quotes (").
-												feat(index): introduce fields_ids_distribution

											
										
										
											2021-03-31 17:14:23 +02:00
+								        assert_eq!(doc.get(1).unwrap().len(), 36 + 2);
-												Generate a uuid v4 based document id when missing

											
										
										
											2020-10-31 12:54:43 +01:00
+								        drop(rtxn);
 								    }
 								    #[test]
 								    fn reordered_auto_generated_documents_ids() {
 								        let path = tempfile::tempdir().unwrap();
 								        let mut options = EnvOpenOptions::new();
 								        options.map_size(10 * 1024 * 1024); // 10 MB
 								        let index = Index::new(options, &path).unwrap();
 								        // First we send 3 documents with ids from 1 to 3.
 								        let mut wtxn = index.write_txn().unwrap();
-												Implement documents format

document reader transform

remove update format

support document sequences

fix document transform

clean transform

improve error handling

add documents! macro

fix transform bug

fix tests

remove csv dependency

Add comments on the transform process

replace search cli

fmt

review edits

fix http ui

fix clippy warnings

Revert "fix clippy warnings"

This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620.

fix review comments

remove smallvec in transform loop

review edits

											
										
										
											2021-08-31 11:44:15 +02:00
+								        let content = documents!([
 								            { "id": 1, "name": "kevin" },
 								            { "id": 2, "name": "kevina" },
 								            { "id": 3, "name": "benoit" }
 								        ]);
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        let config = IndexerConfig::default();
 								        let indexing_config = IndexDocumentsConfig::default();
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								        let mut builder =
 								            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        builder.add_documents(content).unwrap();
 								        builder.execute().unwrap();
-												Generate a uuid v4 based document id when missing

											
										
										
											2020-10-31 12:54:43 +01:00
+								        wtxn.commit().unwrap();
 								        // Check that there is 3 documents now.
 								        let rtxn = index.read_txn().unwrap();
 								        let count = index.number_of_documents(&rtxn).unwrap();
 								        assert_eq!(count, 3);
 								        drop(rtxn);
 								        // Second we send 1 document without specifying the id.
 								        let mut wtxn = index.write_txn().unwrap();
-												Implement documents format

document reader transform

remove update format

support document sequences

fix document transform

clean transform

improve error handling

add documents! macro

fix transform bug

fix tests

remove csv dependency

Add comments on the transform process

replace search cli

fmt

review edits

fix http ui

fix clippy warnings

Revert "fix clippy warnings"

This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620.

fix review comments

remove smallvec in transform loop

review edits

											
										
										
											2021-08-31 11:44:15 +02:00
+								        let content = documents!([ { "name": "new kevin" } ]);
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        let indexing_config =
 								            IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								        let mut builder =
 								            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        builder.add_documents(content).unwrap();
 								        builder.execute().unwrap();
-												Generate a uuid v4 based document id when missing

											
										
										
											2020-10-31 12:54:43 +01:00
+								        wtxn.commit().unwrap();
 								        // Check that there is 4 documents now.
 								        let rtxn = index.read_txn().unwrap();
 								        let count = index.number_of_documents(&rtxn).unwrap();
 								        assert_eq!(count, 4);
 								        drop(rtxn);
 								    }
-												Update the Transform struct to support JSON updates

											
										
										
											2020-10-31 16:10:15 +01:00
 								    #[test]
-												Implement documents format

document reader transform

remove update format

support document sequences

fix document transform

clean transform

improve error handling

add documents! macro

fix transform bug

fix tests

remove csv dependency

Add comments on the transform process

replace search cli

fmt

review edits

fix http ui

fix clippy warnings

Revert "fix clippy warnings"

This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620.

fix review comments

remove smallvec in transform loop

review edits

											
										
										
											2021-08-31 11:44:15 +02:00
+								    fn empty_update() {
-												Update the Transform struct to support JSON updates

											
										
										
											2020-10-31 16:10:15 +01:00
+								        let path = tempfile::tempdir().unwrap();
 								        let mut options = EnvOpenOptions::new();
 								        options.map_size(10 * 1024 * 1024); // 10 MB
 								        let index = Index::new(options, &path).unwrap();
 								        // First we send 0 documents and only headers.
 								        let mut wtxn = index.write_txn().unwrap();
-												Implement documents format

document reader transform

remove update format

support document sequences

fix document transform

clean transform

improve error handling

add documents! macro

fix transform bug

fix tests

remove csv dependency

Add comments on the transform process

replace search cli

fmt

review edits

fix http ui

fix clippy warnings

Revert "fix clippy warnings"

This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620.

fix review comments

remove smallvec in transform loop

review edits

											
										
										
											2021-08-31 11:44:15 +02:00
+								        let content = documents!([]);
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        let config = IndexerConfig::default();
 								        let indexing_config = IndexDocumentsConfig::default();
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								        let mut builder =
 								            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        builder.add_documents(content).unwrap();
 								        builder.execute().unwrap();
-												Update the Transform struct to support JSON updates

											
										
										
											2020-10-31 16:10:15 +01:00
+								        wtxn.commit().unwrap();
 								        // Check that there is no documents.
 								        let rtxn = index.read_txn().unwrap();
 								        let count = index.number_of_documents(&rtxn).unwrap();
 								        assert_eq!(count, 0);
 								        drop(rtxn);
 								    }
-												Update the Transform struct to support JSON stream updates

											
										
										
											2020-11-01 11:50:10 +01:00
-												Validate documents ids before accepting them

											
										
										
											2020-11-01 16:43:12 +01:00
+								    #[test]
 								    fn invalid_documents_ids() {
 								        let path = tempfile::tempdir().unwrap();
 								        let mut options = EnvOpenOptions::new();
 								        options.map_size(10 * 1024 * 1024); // 10 MB
 								        let index = Index::new(options, &path).unwrap();
 								        // First we send 1 document with an invalid id.
 								        let mut wtxn = index.write_txn().unwrap();
 								        // There is a space in the document id.
-												Implement documents format

document reader transform

remove update format

support document sequences

fix document transform

clean transform

improve error handling

add documents! macro

fix transform bug

fix tests

remove csv dependency

Add comments on the transform process

replace search cli

fmt

review edits

fix http ui

fix clippy warnings

Revert "fix clippy warnings"

This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620.

fix review comments

remove smallvec in transform loop

review edits

											
										
										
											2021-08-31 11:44:15 +02:00
+								        let content = documents!([ { "id": "brume bleue", "name": "kevin" } ]);
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        let config = IndexerConfig::default();
 								        let indexing_config = IndexDocumentsConfig::default();
 								        let mut builder =
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
 								                .unwrap();
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        assert!(builder.add_documents(content).is_err());
-												Validate documents ids before accepting them

											
										
										
											2020-11-01 16:43:12 +01:00
+								        wtxn.commit().unwrap();
 								        // First we send 1 document with a valid id.
 								        let mut wtxn = index.write_txn().unwrap();
 								        // There is a space in the document id.
-												Implement documents format

document reader transform

remove update format

support document sequences

fix document transform

clean transform

improve error handling

add documents! macro

fix transform bug

fix tests

remove csv dependency

Add comments on the transform process

replace search cli

fmt

review edits

fix http ui

fix clippy warnings

Revert "fix clippy warnings"

This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620.

fix review comments

remove smallvec in transform loop

review edits

											
										
										
											2021-08-31 11:44:15 +02:00
+								        let content = documents!([ { "id": 32, "name": "kevin" } ]);
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								        let mut builder =
 								            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        builder.add_documents(content).unwrap();
 								        builder.execute().unwrap();
-												Validate documents ids before accepting them

											
										
										
											2020-11-01 16:43:12 +01:00
+								        wtxn.commit().unwrap();
 								        // Check that there is 1 document now.
 								        let rtxn = index.read_txn().unwrap();
 								        let count = index.number_of_documents(&rtxn).unwrap();
 								        assert_eq!(count, 1);
 								        drop(rtxn);
 								    }
-												Make sure we index all kind of JSON types

											
										
										
											2020-11-06 16:15:07 +01:00
 								    #[test]
-												Implement documents format

document reader transform

remove update format

support document sequences

fix document transform

clean transform

improve error handling

add documents! macro

fix transform bug

fix tests

remove csv dependency

Add comments on the transform process

replace search cli

fmt

review edits

fix http ui

fix clippy warnings

Revert "fix clippy warnings"

This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620.

fix review comments

remove smallvec in transform loop

review edits

											
										
										
											2021-08-31 11:44:15 +02:00
+								    fn complex_documents() {
-												Make sure we index all kind of JSON types

											
										
										
											2020-11-06 16:15:07 +01:00
+								        let path = tempfile::tempdir().unwrap();
 								        let mut options = EnvOpenOptions::new();
 								        options.map_size(10 * 1024 * 1024); // 10 MB
 								        let index = Index::new(options, &path).unwrap();
 								        // First we send 3 documents with an id for only one of them.
 								        let mut wtxn = index.write_txn().unwrap();
-												Implement documents format

document reader transform

remove update format

support document sequences

fix document transform

clean transform

improve error handling

add documents! macro

fix transform bug

fix tests

remove csv dependency

Add comments on the transform process

replace search cli

fmt

review edits

fix http ui

fix clippy warnings

Revert "fix clippy warnings"

This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620.

fix review comments

remove smallvec in transform loop

review edits

											
										
										
											2021-08-31 11:44:15 +02:00
+								        let content = documents!([
-												Make sure we index all kind of JSON types

											
										
										
											2020-11-06 16:15:07 +01:00
+								            { "id": 0, "name": "kevin", "object": { "key1": "value1", "key2": "value2" } },
 								            { "id": 1, "name": "kevina", "array": ["I", "am", "fine"] },
 								            { "id": 2, "name": "benoit", "array_of_object": [{ "wow": "amazing" }] }
-												Implement documents format

document reader transform

remove update format

support document sequences

fix document transform

clean transform

improve error handling

add documents! macro

fix transform bug

fix tests

remove csv dependency

Add comments on the transform process

replace search cli

fmt

review edits

fix http ui

fix clippy warnings

Revert "fix clippy warnings"

This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620.

fix review comments

remove smallvec in transform loop

review edits

											
										
										
											2021-08-31 11:44:15 +02:00
+								        ]);
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        let config = IndexerConfig::default();
 								        let indexing_config = IndexDocumentsConfig::default();
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								        let mut builder =
 								            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        builder.add_documents(content).unwrap();
 								        builder.execute().unwrap();
-												Make sure we index all kind of JSON types

											
										
										
											2020-11-06 16:15:07 +01:00
+								        wtxn.commit().unwrap();
 								        // Check that there is 1 documents now.
 								        let rtxn = index.read_txn().unwrap();
 								        // Search for a sub object value
 								        let result = index.search(&rtxn).query(r#""value2""#).execute().unwrap();
 								        assert_eq!(result.documents_ids, vec![0]);
 								        // Search for a sub array value
 								        let result = index.search(&rtxn).query(r#""fine""#).execute().unwrap();
 								        assert_eq!(result.documents_ids, vec![1]);
 								        // Search for a sub array sub object key
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								        let result = index.search(&rtxn).query(r#""amazing""#).execute().unwrap();
-												Make sure we index all kind of JSON types

											
										
										
											2020-11-06 16:15:07 +01:00
+								        assert_eq!(result.documents_ids, vec![2]);
 								        drop(rtxn);
 								    }
-												Add a test for the words level positions generation bug

											
										
										
											2021-06-23 18:35:44 +02:00
 								    #[test]
 								    fn simple_documents_replace() {
 								        let path = tempfile::tempdir().unwrap();
 								        let mut options = EnvOpenOptions::new();
 								        options.map_size(10 * 1024 * 1024); // 10 MB
 								        let index = Index::new(options, &path).unwrap();
 								        // First we send 3 documents with an id for only one of them.
 								        let mut wtxn = index.write_txn().unwrap();
-												Implement documents format

document reader transform

remove update format

support document sequences

fix document transform

clean transform

improve error handling

add documents! macro

fix transform bug

fix tests

remove csv dependency

Add comments on the transform process

replace search cli

fmt

review edits

fix http ui

fix clippy warnings

Revert "fix clippy warnings"

This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620.

fix review comments

remove smallvec in transform loop

review edits

											
										
										
											2021-08-31 11:44:15 +02:00
+								        let documents = documents!([
-												add a tests for the indexation of the geosearch

											
										
										
											2021-08-26 13:27:32 +02:00
+								          { "id": 2,    "title": "Pride and Prejudice",                    "author": "Jane Austin",              "genre": "romance",    "price": 3.5, "_geo": { "lat": 12, "lng": 42 } },
-												Add a test for the words level positions generation bug

											
										
										
											2021-06-23 18:35:44 +02:00
+								          { "id": 456,  "title": "Le Petit Prince",                        "author": "Antoine de Saint-Exupéry", "genre": "adventure" , "price": 10.0 },
 								          { "id": 1,    "title": "Alice In Wonderland",                    "author": "Lewis Carroll",            "genre": "fantasy",    "price": 25.99 },
 								          { "id": 1344, "title": "The Hobbit",                             "author": "J. R. R. Tolkien",         "genre": "fantasy" },
 								          { "id": 4,    "title": "Harry Potter and the Half-Blood Prince", "author": "J. K. Rowling",            "genre": "fantasy" },
-												add a tests for the indexation of the geosearch

											
										
										
											2021-08-26 13:27:32 +02:00
+								          { "id": 42,   "title": "The Hitchhiker's Guide to the Galaxy",   "author": "Douglas Adams", "_geo": { "lat": 35, "lng": 23 } }
-												Implement documents format

document reader transform

remove update format

support document sequences

fix document transform

clean transform

improve error handling

add documents! macro

fix transform bug

fix tests

remove csv dependency

Add comments on the transform process

replace search cli

fmt

review edits

fix http ui

fix clippy warnings

Revert "fix clippy warnings"

This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620.

fix review comments

remove smallvec in transform loop

review edits

											
										
										
											2021-08-31 11:44:15 +02:00
+								        ]);
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        let config = IndexerConfig::default();
 								        let indexing_config = IndexDocumentsConfig {
 								            update_method: IndexDocumentsMethod::ReplaceDocuments,
 								            ..Default::default()
 								        };
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								        let mut builder =
 								            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        builder.add_documents(documents).unwrap();
 								        builder.execute().unwrap();
-												Add a test for the words level positions generation bug

											
										
										
											2021-06-23 18:35:44 +02:00
+								        wtxn.commit().unwrap();
 								        let mut wtxn = index.write_txn().unwrap();
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        let indexing_config = IndexDocumentsConfig {
 								            update_method: IndexDocumentsMethod::UpdateDocuments,
 								            ..Default::default()
 								        };
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								        let mut builder =
 								            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
-												Implement documents format

document reader transform

remove update format

support document sequences

fix document transform

clean transform

improve error handling

add documents! macro

fix transform bug

fix tests

remove csv dependency

Add comments on the transform process

replace search cli

fmt

review edits

fix http ui

fix clippy warnings

Revert "fix clippy warnings"

This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620.

fix review comments

remove smallvec in transform loop

review edits

											
										
										
											2021-08-31 11:44:15 +02:00
+								        let documents = documents!([
-												Add a test for the words level positions generation bug

											
										
										
											2021-06-23 18:35:44 +02:00
+								          {
 								            "id": 2,
 								            "author": "J. Austen",
 								            "date": "1813"
 								          }
-												Implement documents format

document reader transform

remove update format

support document sequences

fix document transform

clean transform

improve error handling

add documents! macro

fix transform bug

fix tests

remove csv dependency

Add comments on the transform process

replace search cli

fmt

review edits

fix http ui

fix clippy warnings

Revert "fix clippy warnings"

This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620.

fix review comments

remove smallvec in transform loop

review edits

											
										
										
											2021-08-31 11:44:15 +02:00
+								        ]);
-												Add a test for the words level positions generation bug

											
										
										
											2021-06-23 18:35:44 +02:00
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        builder.add_documents(documents).unwrap();
 								        builder.execute().unwrap();
-												Add a test for the words level positions generation bug

											
										
										
											2021-06-23 18:35:44 +02:00
+								        wtxn.commit().unwrap();
 								    }
-												Add a test for when we insert a previously deleted document

											
										
										
											2021-06-30 11:23:29 +02:00
-												fix the mixed dataset geosearch indexing bug

											
										
										
											2022-05-16 15:55:18 +02:00
+								    #[test]
 								    fn mixed_geo_documents() {
 								        let path = tempfile::tempdir().unwrap();
 								        let mut options = EnvOpenOptions::new();
 								        options.map_size(10 * 1024 * 1024); // 10 MB
 								        let index = Index::new(options, &path).unwrap();
 								        // We send 6 documents and mix the ones that have _geo and those that don't have it.
 								        let mut wtxn = index.write_txn().unwrap();
 								        let documents = documents!([
 								          { "id": 2, "price": 3.5, "_geo": { "lat": 12, "lng": 42 } },
 								          { "id": 456 },
 								          { "id": 1 },
 								          { "id": 1344 },
 								          { "id": 4 },
 								          { "id": 42, "_geo": { "lat": 35, "lng": 23 } }
 								        ]);
 								        let config = IndexerConfig::default();
 								        let indexing_config = IndexDocumentsConfig {
 								            update_method: IndexDocumentsMethod::ReplaceDocuments,
 								            ..Default::default()
 								        };
 								        let mut builder =
 								            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
 								        builder.add_documents(documents).unwrap();
 								        builder.execute().unwrap();
 								        wtxn.commit().unwrap();
 								        let mut wtxn = index.write_txn().unwrap();
 								        let mut builder = update::Settings::new(&mut wtxn, &index, &config);
 								        let faceted_fields = hashset!(S("_geo"));
 								        builder.set_filterable_fields(faceted_fields);
 								        builder.execute(|_| ()).unwrap();
 								        wtxn.commit().unwrap();
 								    }
-												improve geosearch error messages

											
										
										
											2022-05-02 19:19:50 +02:00
+								    #[test]
 								    fn index_all_flavour_of_geo() {
 								        let path = tempfile::tempdir().unwrap();
 								        let mut options = EnvOpenOptions::new();
 								        options.map_size(10 * 1024 * 1024); // 10 MB
 								        let index = Index::new(options, &path).unwrap();
 								        let config = IndexerConfig::default();
 								        let mut wtxn = index.write_txn().unwrap();
 								        let mut builder = update::Settings::new(&mut wtxn, &index, &config);
 								        builder.set_filterable_fields(hashset!(S("_geo")));
 								        builder.execute(|_| ()).unwrap();
 								        wtxn.commit().unwrap();
 								        let indexing_config = IndexDocumentsConfig {
 								            update_method: IndexDocumentsMethod::ReplaceDocuments,
 								            ..Default::default()
 								        };
 								        let mut wtxn = index.write_txn().unwrap();
 								        let documents = documents!([
 								          { "id": 0, "_geo": { "lat": 31, "lng": [42] } },
 								          { "id": 1, "_geo": { "lat": "31" }, "_geo.lng": 42 },
 								          { "id": 2, "_geo": { "lng": "42" }, "_geo.lat": "31" },
 								          { "id": 3, "_geo.lat": 31, "_geo.lng": "42" },
 								        ]);
 								        let mut builder =
 								            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
 								                .unwrap();
 								        builder.add_documents(documents).unwrap();
 								        builder.execute().unwrap();
 								        wtxn.commit().unwrap();
 								        let rtxn = index.read_txn().unwrap();
 								        let mut search = crate::Search::new(&rtxn, &index);
 								        search.filter(crate::Filter::from_str("_geoRadius(31, 42, 0.000001)").unwrap().unwrap());
 								        let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
 								        assert_eq!(documents_ids, vec![0, 1, 2, 3]);
 								    }
 								    #[test]
 								    fn geo_error() {
 								        let path = tempfile::tempdir().unwrap();
 								        let mut options = EnvOpenOptions::new();
 								        options.map_size(10 * 1024 * 1024); // 10 MB
 								        let index = Index::new(options, &path).unwrap();
 								        let config = IndexerConfig::default();
 								        let mut wtxn = index.write_txn().unwrap();
 								        let mut builder = update::Settings::new(&mut wtxn, &index, &config);
 								        builder.set_filterable_fields(hashset!(S("_geo")));
 								        builder.execute(|_| ()).unwrap();
 								        wtxn.commit().unwrap();
 								        let indexing_config = IndexDocumentsConfig {
 								            update_method: IndexDocumentsMethod::ReplaceDocuments,
 								            ..Default::default()
 								        };
 								        let mut wtxn = index.write_txn().unwrap();
 								        let documents = documents!([
 								          { "id": 0, "_geo": { "lng": 42 } }
 								        ]);
 								        let mut builder =
 								            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
 								                .unwrap();
 								        builder.add_documents(documents).unwrap();
 								        let error = builder.execute().unwrap_err();
 								        assert_eq!(
 								            &error.to_string(),
 								            r#"Could not find latitude in the document with the id: `0`. Was expecting a `_geo.lat` field."#
 								        );
 								        let documents = documents!([
 								          { "id": 0, "_geo": { "lat": 42 } }
 								        ]);
 								        let mut builder =
 								            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
 								                .unwrap();
 								        builder.add_documents(documents).unwrap();
 								        let error = builder.execute().unwrap_err();
 								        assert_eq!(
 								            &error.to_string(),
 								            r#"Could not find longitude in the document with the id: `0`. Was expecting a `_geo.lng` field."#
 								        );
 								        let documents = documents!([
 								          { "id": 0, "_geo": { "lat": "lol", "lng": 42 } }
 								        ]);
 								        let mut builder =
 								            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
 								                .unwrap();
 								        builder.add_documents(documents).unwrap();
 								        let error = builder.execute().unwrap_err();
 								        assert_eq!(
 								            &error.to_string(),
 								            r#"Could not parse latitude in the document with the id: `0`. Was expecting a number but instead got `"lol"`."#
 								        );
 								        let documents = documents!([
 								          { "id": 0, "_geo": { "lat": [12, 13], "lng": 42 } }
 								        ]);
 								        let mut builder =
 								            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
 								                .unwrap();
 								        builder.add_documents(documents).unwrap();
 								        let error = builder.execute().unwrap_err();
 								        assert_eq!(
 								            &error.to_string(),
 								            r#"Could not parse latitude in the document with the id: `0`. Was expecting a number but instead got `[12,13]`."#
 								        );
 								        let documents = documents!([
 								          { "id": 0, "_geo": { "lat": 12, "lng": "hello" } }
 								        ]);
 								        let mut builder =
 								            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
 								                .unwrap();
 								        builder.add_documents(documents).unwrap();
 								        let error = builder.execute().unwrap_err();
 								        assert_eq!(
 								            &error.to_string(),
 								            r#"Could not parse longitude in the document with the id: `0`. Was expecting a number but instead got `"hello"`."#
 								        );
 								    }
-												Add a test for when we insert a previously deleted document

											
										
										
											2021-06-30 11:23:29 +02:00
+								    #[test]
 								    fn delete_documents_then_insert() {
 								        let path = tempfile::tempdir().unwrap();
 								        let mut options = EnvOpenOptions::new();
 								        options.map_size(10 * 1024 * 1024); // 10 MB
 								        let index = Index::new(options, &path).unwrap();
 								        let mut wtxn = index.write_txn().unwrap();
-												Implement documents format

document reader transform

remove update format

support document sequences

fix document transform

clean transform

improve error handling

add documents! macro

fix transform bug

fix tests

remove csv dependency

Add comments on the transform process

replace search cli

fmt

review edits

fix http ui

fix clippy warnings

Revert "fix clippy warnings"

This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620.

fix review comments

remove smallvec in transform loop

review edits

											
										
										
											2021-08-31 11:44:15 +02:00
+								        let content = documents!([
-												Add a test for when we insert a previously deleted document

											
										
										
											2021-06-30 11:23:29 +02:00
+								            { "objectId": 123, "title": "Pride and Prejudice", "comment": "A great book" },
 								            { "objectId": 456, "title": "Le Petit Prince",     "comment": "A french book" },
 								            { "objectId": 1,   "title": "Alice In Wonderland", "comment": "A weird book" },
-												add a tests for the indexation of the geosearch

											
										
										
											2021-08-26 13:27:32 +02:00
+								            { "objectId": 30,  "title": "Hamlet", "_geo": { "lat": 12, "lng": 89 } }
-												Implement documents format

document reader transform

remove update format

support document sequences

fix document transform

clean transform

improve error handling

add documents! macro

fix transform bug

fix tests

remove csv dependency

Add comments on the transform process

replace search cli

fmt

review edits

fix http ui

fix clippy warnings

Revert "fix clippy warnings"

This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620.

fix review comments

remove smallvec in transform loop

review edits

											
										
										
											2021-08-31 11:44:15 +02:00
+								        ]);
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        let config = IndexerConfig::default();
 								        let indexing_config = IndexDocumentsConfig::default();
 								        let mut builder =
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
 								                .unwrap();
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        builder.add_documents(content).unwrap();
 								        builder.execute().unwrap();
-												Add a test for when we insert a previously deleted document

											
										
										
											2021-06-30 11:23:29 +02:00
 								        assert_eq!(index.primary_key(&wtxn).unwrap(), Some("objectId"));
 								        // Delete not all of the documents but some of them.
-												remove update_id in UpdateBuilder

											
										
										
											2021-11-03 13:12:01 +01:00
+								        let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
-												Add a test for when we insert a previously deleted document

											
										
										
											2021-06-30 11:23:29 +02:00
+								        builder.delete_external_id("30");
 								        builder.execute().unwrap();
 								        let external_documents_ids = index.external_documents_ids(&wtxn).unwrap();
 								        assert!(external_documents_ids.get("30").is_none());
-												Implement documents format

document reader transform

remove update format

support document sequences

fix document transform

clean transform

improve error handling

add documents! macro

fix transform bug

fix tests

remove csv dependency

Add comments on the transform process

replace search cli

fmt

review edits

fix http ui

fix clippy warnings

Revert "fix clippy warnings"

This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620.

fix review comments

remove smallvec in transform loop

review edits

											
										
										
											2021-08-31 11:44:15 +02:00
+								        let content = documents!([
-												add a tests for the indexation of the geosearch

											
										
										
											2021-08-26 13:27:32 +02:00
+								            { "objectId": 30,  "title": "Hamlet", "_geo": { "lat": 12, "lng": 89 } }
-												Implement documents format

document reader transform

remove update format

support document sequences

fix document transform

clean transform

improve error handling

add documents! macro

fix transform bug

fix tests

remove csv dependency

Add comments on the transform process

replace search cli

fmt

review edits

fix http ui

fix clippy warnings

Revert "fix clippy warnings"

This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620.

fix review comments

remove smallvec in transform loop

review edits

											
										
										
											2021-08-31 11:44:15 +02:00
+								        ]);
-												Add a test for when we insert a previously deleted document

											
										
										
											2021-06-30 11:23:29 +02:00
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        let mut builder =
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
 								                .unwrap();
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        builder.add_documents(content).unwrap();
 								        builder.execute().unwrap();
-												Add a test for when we insert a previously deleted document

											
										
										
											2021-06-30 11:23:29 +02:00
+								        let external_documents_ids = index.external_documents_ids(&wtxn).unwrap();
 								        assert!(external_documents_ids.get("30").is_some());
-												Implement documents format

document reader transform

remove update format

support document sequences

fix document transform

clean transform

improve error handling

add documents! macro

fix transform bug

fix tests

remove csv dependency

Add comments on the transform process

replace search cli

fmt

review edits

fix http ui

fix clippy warnings

Revert "fix clippy warnings"

This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620.

fix review comments

remove smallvec in transform loop

review edits

											
										
										
											2021-08-31 11:44:15 +02:00
+								        let content = documents!([
-												add a tests for the indexation of the geosearch

											
										
										
											2021-08-26 13:27:32 +02:00
+								            { "objectId": 30,  "title": "Hamlet", "_geo": { "lat": 12, "lng": 89 } }
-												Implement documents format

document reader transform

remove update format

support document sequences

fix document transform

clean transform

improve error handling

add documents! macro

fix transform bug

fix tests

remove csv dependency

Add comments on the transform process

replace search cli

fmt

review edits

fix http ui

fix clippy warnings

Revert "fix clippy warnings"

This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620.

fix review comments

remove smallvec in transform loop

review edits

											
										
										
											2021-08-31 11:44:15 +02:00
+								        ]);
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
 								        let mut builder =
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
 								                .unwrap();
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        builder.add_documents(content).unwrap();
 								        builder.execute().unwrap();
-												Add a test for when we insert a previously deleted document

											
										
										
											2021-06-30 11:23:29 +02:00
 								        wtxn.commit().unwrap();
 								    }
-												Add a test to check that we can index more that 256 fields

											
										
										
											2021-07-06 11:40:45 +02:00
 								    #[test]
 								    fn index_more_than_256_fields() {
 								        let path = tempfile::tempdir().unwrap();
 								        let mut options = EnvOpenOptions::new();
 								        options.map_size(10 * 1024 * 1024); // 10 MB
 								        let index = Index::new(options, &path).unwrap();
 								        let mut wtxn = index.write_txn().unwrap();
 								        let mut big_object = HashMap::new();
 								        big_object.insert(S("id"), "wow");
 								        for i in 0..1000 {
 								            let key = i.to_string();
 								            big_object.insert(key, "I am a text!");
 								        }
-												Implement documents format

document reader transform

remove update format

support document sequences

fix document transform

clean transform

improve error handling

add documents! macro

fix transform bug

fix tests

remove csv dependency

Add comments on the transform process

replace search cli

fmt

review edits

fix http ui

fix clippy warnings

Revert "fix clippy warnings"

This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620.

fix review comments

remove smallvec in transform loop

review edits

											
										
										
											2021-08-31 11:44:15 +02:00
+								        let mut cursor = Cursor::new(Vec::new());
-												Add a test to check that we can index more that 256 fields

											
										
										
											2021-07-06 11:40:45 +02:00
-												Implement documents format

document reader transform

remove update format

support document sequences

fix document transform

clean transform

improve error handling

add documents! macro

fix transform bug

fix tests

remove csv dependency

Add comments on the transform process

replace search cli

fmt

review edits

fix http ui

fix clippy warnings

Revert "fix clippy warnings"

This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620.

fix review comments

remove smallvec in transform loop

review edits

											
										
										
											2021-08-31 11:44:15 +02:00
+								        let mut builder = DocumentBatchBuilder::new(&mut cursor).unwrap();
-												fix tests

											
										
										
											2021-10-24 14:41:36 +02:00
+								        let big_object = Cursor::new(serde_json::to_vec(&big_object).unwrap());
 								        builder.extend_from_json(big_object).unwrap();
-												Implement documents format

document reader transform

remove update format

support document sequences

fix document transform

clean transform

improve error handling

add documents! macro

fix transform bug

fix tests

remove csv dependency

Add comments on the transform process

replace search cli

fmt

review edits

fix http ui

fix clippy warnings

Revert "fix clippy warnings"

This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620.

fix review comments

remove smallvec in transform loop

review edits

											
										
										
											2021-08-31 11:44:15 +02:00
+								        builder.finish().unwrap();
 								        cursor.set_position(0);
 								        let content = DocumentBatchReader::from_reader(cursor).unwrap();
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        let config = IndexerConfig::default();
 								        let indexing_config = IndexDocumentsConfig::default();
 								        let mut builder =
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
 								                .unwrap();
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        builder.add_documents(content).unwrap();
 								        builder.execute().unwrap();
-												Add a test to check that we can index more that 256 fields

											
										
										
											2021-07-06 11:40:45 +02:00
 								        wtxn.commit().unwrap();
 								    }
-												Add a test that triggers a panic when indexing zeroes

											
										
										
											2021-07-22 17:14:44 +02:00
-												Remove limit of 1000 position per attribute

Instead of using an arbitrary limit we encode the absolute position in a u32
using one strong u16 for the field id and a weak u16 for the relative position in the attribute.

											
										
										
											2021-09-22 17:48:24 +02:00
+								    #[test]
 								    fn index_more_than_1000_positions_in_a_field() {
 								        let path = tempfile::tempdir().unwrap();
 								        let mut options = EnvOpenOptions::new();
 								        options.map_size(50 * 1024 * 1024); // 10 MB
 								        let index = Index::new(options, &path).unwrap();
 								        let mut wtxn = index.write_txn().unwrap();
 								        let mut big_object = HashMap::new();
 								        big_object.insert(S("id"), "wow");
-												Use smartstring to store the external id in our hashmap

We need to store all the external id (primary key) in a hashmap
associated to their internal id during.
The smartstring remove heap allocation / memory usage and should
improve the cache locality.

											
										
										
											2022-04-11 15:43:18 +02:00
+								        let content: String = (0..=u16::MAX)
 								            .into_iter()
 								            .map(|p| p.to_string())
 								            .reduce(|a, b| a + " " + b.as_ref())
 								            .unwrap();
-												Remove limit of 1000 position per attribute

Instead of using an arbitrary limit we encode the absolute position in a u32
using one strong u16 for the field id and a weak u16 for the relative position in the attribute.

											
										
										
											2021-09-22 17:48:24 +02:00
+								        big_object.insert("content".to_string(), &content);
 								        let mut cursor = Cursor::new(Vec::new());
-												implement review suggestions

											
										
										
											2021-10-25 17:38:32 +02:00
+								        let big_object = serde_json::to_string(&big_object).unwrap();
-												Remove limit of 1000 position per attribute

Instead of using an arbitrary limit we encode the absolute position in a u32
using one strong u16 for the field id and a weak u16 for the relative position in the attribute.

											
										
										
											2021-09-22 17:48:24 +02:00
+								        let mut builder = DocumentBatchBuilder::new(&mut cursor).unwrap();
-												implement review suggestions

											
										
										
											2021-10-25 17:38:32 +02:00
+								        builder.extend_from_json(&mut big_object.as_bytes()).unwrap();
-												Remove limit of 1000 position per attribute

Instead of using an arbitrary limit we encode the absolute position in a u32
using one strong u16 for the field id and a weak u16 for the relative position in the attribute.

											
										
										
											2021-09-22 17:48:24 +02:00
+								        builder.finish().unwrap();
 								        cursor.set_position(0);
 								        let content = DocumentBatchReader::from_reader(cursor).unwrap();
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        let config = IndexerConfig::default();
 								        let indexing_config = IndexDocumentsConfig::default();
 								        let mut builder =
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
 								                .unwrap();
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        builder.add_documents(content).unwrap();
 								        builder.execute().unwrap();
-												Remove limit of 1000 position per attribute

Instead of using an arbitrary limit we encode the absolute position in a u32
using one strong u16 for the field id and a weak u16 for the relative position in the attribute.

											
										
										
											2021-09-22 17:48:24 +02:00
 								        wtxn.commit().unwrap();
 								        let mut rtxn = index.read_txn().unwrap();
 								        assert!(index.word_docids.get(&mut rtxn, "0").unwrap().is_some());
 								        assert!(index.word_docids.get(&mut rtxn, "64").unwrap().is_some());
 								        assert!(index.word_docids.get(&mut rtxn, "256").unwrap().is_some());
 								        assert!(index.word_docids.get(&mut rtxn, "1024").unwrap().is_some());
 								        assert!(index.word_docids.get(&mut rtxn, "32768").unwrap().is_some());
 								        assert!(index.word_docids.get(&mut rtxn, "65535").unwrap().is_some());
 								    }
-												Add a test that triggers a panic when indexing zeroes

											
										
										
											2021-07-22 17:14:44 +02:00
+								    #[test]
 								    fn index_documents_with_zeroes() {
 								        let path = tempfile::tempdir().unwrap();
 								        let mut options = EnvOpenOptions::new();
 								        options.map_size(10 * 1024 * 1024); // 10 MB
 								        let index = Index::new(options, &path).unwrap();
 								        let mut wtxn = index.write_txn().unwrap();
-												Implement documents format

document reader transform

remove update format

support document sequences

fix document transform

clean transform

improve error handling

add documents! macro

fix transform bug

fix tests

remove csv dependency

Add comments on the transform process

replace search cli

fmt

review edits

fix http ui

fix clippy warnings

Revert "fix clippy warnings"

This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620.

fix review comments

remove smallvec in transform loop

review edits

											
										
										
											2021-08-31 11:44:15 +02:00
+								        let content = documents!([
 								            {
 								                "id": 2,
 								                "title": "Prideand Prejudice",
 								                "au{hor": "Jane Austin",
 								                "genre": "romance",
 								                "price$": "3.5$",
 								            },
 								            {
 								                "id": 456,
 								                "title": "Le Petit Prince",
 								                "au{hor": "Antoine de Saint-Exupéry",
 								                "genre": "adventure",
 								                "price$": "10.0$",
 								            },
 								            {
 								                "id": 1,
 								                "title": "Wonderland",
 								                "au{hor": "Lewis Carroll",
 								                "genre": "fantasy",
 								                "price$": "25.99$",
 								            },
 								            {
 								                "id": 4,
 								                "title": "Harry Potter ing fantasy\0lood Prince",
 								                "au{hor": "J. K. Rowling",
 								                "genre": "fantasy\0",
 								            },
 								        ]);
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        let config = IndexerConfig::default();
 								        let indexing_config = IndexDocumentsConfig::default();
 								        let mut builder =
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
 								                .unwrap();
 								        builder.add_documents(content).unwrap();
 								        builder.execute().unwrap();
 								        wtxn.commit().unwrap();
 								    }
 								    #[test]
 								    fn index_documents_with_nested_fields() {
 								        let path = tempfile::tempdir().unwrap();
 								        let mut options = EnvOpenOptions::new();
 								        options.map_size(10 * 1024 * 1024); // 10 MB
 								        let index = Index::new(options, &path).unwrap();
 								        let mut wtxn = index.write_txn().unwrap();
 								        let content = documents!([
 								            {
 								                "id": 0,
 								                "title": "The zeroth document",
 								            },
 								            {
 								                "id": 1,
 								                "title": "The first document",
 								                "nested": {
 								                    "object": "field",
 								                    "machin": "bidule",
 								                },
 								            },
 								            {
 								                "id": 2,
 								                "title": "The second document",
 								                "nested": [
 								                    "array",
 								                    {
 								                        "object": "field",
 								                    },
 								                    {
 								                        "prout": "truc",
 								                        "machin": "lol",
 								                    },
 								                ],
 								            },
 								            {
 								                "id": 3,
 								                "title": "The third document",
 								                "nested": "I lied",
 								            },
 								        ]);
 								        let config = IndexerConfig::default();
 								        let indexing_config = IndexDocumentsConfig::default();
 								        let mut builder =
 								            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
 								                .unwrap();
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        builder.add_documents(content).unwrap();
 								        builder.execute().unwrap();
-												Add a test that triggers a panic when indexing zeroes

											
										
										
											2021-07-22 17:14:44 +02:00
 								        wtxn.commit().unwrap();
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
 								        let mut wtxn = index.write_txn().unwrap();
 								        let mut builder = update::Settings::new(&mut wtxn, &index, &config);
 								        let searchable_fields = vec![S("title"), S("nested.object"), S("nested.machin")];
 								        builder.set_searchable_fields(searchable_fields);
 								        let faceted_fields = hashset!(S("title"), S("nested.object"), S("nested.machin"));
 								        builder.set_filterable_fields(faceted_fields);
 								        builder.execute(|_| ()).unwrap();
 								        wtxn.commit().unwrap();
 								        let rtxn = index.read_txn().unwrap();
 								        let facets = index.faceted_fields(&rtxn).unwrap();
 								        assert_eq!(facets, hashset!(S("title"), S("nested.object"), S("nested.machin")));
 								        // testing the simple query search
 								        let mut search = crate::Search::new(&rtxn, &index);
 								        search.query("document");
 								        search.authorize_typos(true);
 								        search.optional_words(true);
 								        // all documents should be returned
 								        let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
 								        assert_eq!(documents_ids.len(), 4);
 								        search.query("zeroth");
 								        let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
 								        assert_eq!(documents_ids, vec![0]);
 								        search.query("first");
 								        let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
 								        assert_eq!(documents_ids, vec![1]);
 								        search.query("second");
 								        let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
 								        assert_eq!(documents_ids, vec![2]);
 								        search.query("third");
 								        let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
 								        assert_eq!(documents_ids, vec![3]);
 								        search.query("field");
 								        let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
 								        assert_eq!(documents_ids, vec![1, 2]);
 								        search.query("lol");
 								        let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
 								        assert_eq!(documents_ids, vec![2]);
 								        search.query("object");
 								        let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
 								        assert!(documents_ids.is_empty());
 								        search.query("array");
 								        let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
 								        assert!(documents_ids.is_empty()); // nested is not searchable
 								        search.query("lied");
 								        let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
 								        assert!(documents_ids.is_empty()); // nested is not searchable
 								        // testing the filters
 								        let mut search = crate::Search::new(&rtxn, &index);
 								        search.filter(crate::Filter::from_str(r#"title = "The first document""#).unwrap().unwrap());
 								        let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
 								        assert_eq!(documents_ids, vec![1]);
 								        search.filter(crate::Filter::from_str(r#"nested.object = field"#).unwrap().unwrap());
 								        let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
 								        assert_eq!(documents_ids, vec![1, 2]);
 								        search.filter(crate::Filter::from_str(r#"nested.machin = bidule"#).unwrap().unwrap());
 								        let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
 								        assert_eq!(documents_ids, vec![1]);
 								        search.filter(crate::Filter::from_str(r#"nested = array"#).unwrap().unwrap());
 								        let error = search.execute().map(|_| unreachable!()).unwrap_err(); // nested is not filterable
 								        assert!(matches!(error, crate::Error::UserError(crate::UserError::InvalidFilter(_))));
 								        search.filter(crate::Filter::from_str(r#"nested = "I lied""#).unwrap().unwrap());
 								        let error = search.execute().map(|_| unreachable!()).unwrap_err(); // nested is not filterable
 								        assert!(matches!(error, crate::Error::UserError(crate::UserError::InvalidFilter(_))));
 								    }
 								    #[test]
 								    fn index_documents_with_nested_primary_key() {
 								        let path = tempfile::tempdir().unwrap();
 								        let mut options = EnvOpenOptions::new();
 								        options.map_size(10 * 1024 * 1024); // 10 MB
 								        let index = Index::new(options, &path).unwrap();
 								        let config = IndexerConfig::default();
 								        let mut wtxn = index.write_txn().unwrap();
 								        let mut builder = update::Settings::new(&mut wtxn, &index, &config);
-												Only flatten the required fields

apply review comments

Co-authored-by: Kerollmops <kero@meilisearch.com>

											
										
										
											2022-04-25 14:09:52 +02:00
+								        builder.set_primary_key("complex.nested.id".to_owned());
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								        builder.execute(|_| ()).unwrap();
 								        wtxn.commit().unwrap();
 								        let mut wtxn = index.write_txn().unwrap();
 								        let content = documents!([
 								            {
-												Only flatten the required fields

apply review comments

Co-authored-by: Kerollmops <kero@meilisearch.com>

											
										
										
											2022-04-25 14:09:52 +02:00
+								                "complex": {
 								                    "nested": {
 								                        "id": 0,
 								                    },
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								                },
 								                "title": "The zeroth document",
 								            },
 								            {
-												Only flatten the required fields

apply review comments

Co-authored-by: Kerollmops <kero@meilisearch.com>

											
										
										
											2022-04-25 14:09:52 +02:00
+								                "complex.nested": {
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								                    "id": 1,
 								                },
 								                "title": "The first document",
 								            },
 								            {
-												Only flatten the required fields

apply review comments

Co-authored-by: Kerollmops <kero@meilisearch.com>

											
										
										
											2022-04-25 14:09:52 +02:00
+								                "complex": {
 								                    "nested.id": 2,
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								                },
 								                "title": "The second document",
 								            },
 								            {
-												Only flatten the required fields

apply review comments

Co-authored-by: Kerollmops <kero@meilisearch.com>

											
										
										
											2022-04-25 14:09:52 +02:00
+								                "complex.nested.id": 3,
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								                "title": "The third document",
 								            },
 								        ]);
 								        let indexing_config = IndexDocumentsConfig::default();
 								        let mut builder =
 								            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
 								                .unwrap();
 								        builder.add_documents(content).unwrap();
 								        builder.execute().unwrap();
 								        wtxn.commit().unwrap();
 								        let rtxn = index.read_txn().unwrap();
 								        // testing the simple query search
 								        let mut search = crate::Search::new(&rtxn, &index);
 								        search.query("document");
 								        search.authorize_typos(true);
 								        search.optional_words(true);
 								        // all documents should be returned
 								        let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
 								        assert_eq!(documents_ids.len(), 4);
 								        search.query("zeroth");
 								        let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
 								        assert_eq!(documents_ids, vec![0]);
 								        search.query("first");
 								        let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
 								        assert_eq!(documents_ids, vec![1]);
 								        search.query("second");
 								        let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
 								        assert_eq!(documents_ids, vec![2]);
 								        search.query("third");
 								        let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
 								        assert_eq!(documents_ids, vec![3]);
 								    }
 								    #[test]
 								    fn test_facets_generation() {
 								        let path = tempfile::tempdir().unwrap();
 								        let mut options = EnvOpenOptions::new();
 								        options.map_size(10 * 1024 * 1024); // 10 MB
 								        let index = Index::new(options, &path).unwrap();
 								        let mut wtxn = index.write_txn().unwrap();
 								        let content = documents!([
 								            {
 								                "id": 0,
 								                "dog": {
 								                    "race": {
 								                        "bernese mountain": "zeroth",
 								                    },
 								                },
 								            },
 								            {
 								                "id": 1,
 								                "dog.race": {
 								                    "bernese mountain": "first",
 								                },
 								            },
 								            {
 								                "id": 2,
 								                "dog.race.bernese mountain": "second",
 								            },
 								            {
 								                "id": 3,
 								                "dog": {
 								                    "race.bernese mountain": "third"
 								                },
 								            },
 								        ]);
 								        // index the documents
 								        let config = IndexerConfig::default();
 								        let indexing_config = IndexDocumentsConfig::default();
 								        let mut builder =
 								            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
 								                .unwrap();
 								        builder.add_documents(content).unwrap();
 								        builder.execute().unwrap();
 								        wtxn.commit().unwrap();
 								        // ---- ADD THE SETTING TO TEST THE FILTERABLE
 								        // add the settings
 								        let mut wtxn = index.write_txn().unwrap();
 								        let mut builder = update::Settings::new(&mut wtxn, &index, &config);
 								        builder.set_filterable_fields(hashset!(String::from("dog")));
 								        builder.execute(|_| ()).unwrap();
 								        wtxn.commit().unwrap();
 								        let rtxn = index.read_txn().unwrap();
 								        let hidden = index.faceted_fields(&rtxn).unwrap();
 								        assert_eq!(hidden, hashset!(S("dog"), S("dog.race"), S("dog.race.bernese mountain")));
 								        for (s, i) in [("zeroth", 0), ("first", 1), ("second", 2), ("third", 3)] {
 								            let mut search = crate::Search::new(&rtxn, &index);
 								            let filter = format!(r#""dog.race.bernese mountain" = {s}"#);
 								            search.filter(crate::Filter::from_str(&filter).unwrap().unwrap());
 								            let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
 								            assert_eq!(documents_ids, vec![i]);
 								        }
 								        // ---- RESET THE SETTINGS
 								        // update the settings
 								        let mut wtxn = index.write_txn().unwrap();
 								        let mut builder = update::Settings::new(&mut wtxn, &index, &config);
 								        builder.reset_filterable_fields();
 								        builder.execute(|_| ()).unwrap();
 								        wtxn.commit().unwrap();
 								        let rtxn = index.read_txn().unwrap();
 								        let facets = index.faceted_fields(&rtxn).unwrap();
 								        assert_eq!(facets, hashset!());
 								        // ---- UPDATE THE SETTINGS TO TEST THE SORTABLE
 								        // update the settings
 								        let mut wtxn = index.write_txn().unwrap();
 								        let mut builder = update::Settings::new(&mut wtxn, &index, &config);
 								        builder.set_sortable_fields(hashset!(S("dog.race")));
 								        builder.execute(|_| ()).unwrap();
 								        wtxn.commit().unwrap();
 								        let rtxn = index.read_txn().unwrap();
 								        let facets = index.faceted_fields(&rtxn).unwrap();
 								        assert_eq!(facets, hashset!(S("dog.race"), S("dog.race.bernese mountain")));
 								        let mut search = crate::Search::new(&rtxn, &index);
 								        search.sort_criteria(vec![crate::AscDesc::Asc(crate::Member::Field(S(
 								            "dog.race.bernese mountain",
 								        )))]);
 								        let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
 								        assert_eq!(documents_ids, vec![1, 2, 3, 0]);
-												Add a test that triggers a panic when indexing zeroes

											
										
										
											2021-07-22 17:14:44 +02:00
+								    }
-												Add test checking the bug reported in meilisearch issue 1716

											
										
										
											2021-09-23 15:55:39 +02:00
 								    #[test]
 								    fn index_2_times_documents_split_by_zero_document_indexation() {
 								        let path = tempfile::tempdir().unwrap();
 								        let mut options = EnvOpenOptions::new();
 								        options.map_size(10 * 1024 * 1024); // 10 MB
 								        let index = Index::new(options, &path).unwrap();
 								        let content = documents!([
 								            {"id": 0, "name": "Kerollmops", "score": 78},
 								            {"id": 1, "name": "ManyTheFish", "score": 75},
 								            {"id": 2, "name": "Ferdi", "score": 39},
 								            {"id": 3, "name": "Tommy", "score": 33}
 								        ]);
 								        let mut wtxn = index.write_txn().unwrap();
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        let config = IndexerConfig::default();
 								        let indexing_config = IndexDocumentsConfig::default();
 								        let mut builder =
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
 								                .unwrap();
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        builder.add_documents(content).unwrap();
 								        builder.execute().unwrap();
-												Add test checking the bug reported in meilisearch issue 1716

											
										
										
											2021-09-23 15:55:39 +02:00
+								        wtxn.commit().unwrap();
 								        // Check that there is 4 document now.
 								        let rtxn = index.read_txn().unwrap();
 								        let count = index.number_of_documents(&rtxn).unwrap();
 								        assert_eq!(count, 4);
 								        let content = documents!([]);
 								        let mut wtxn = index.write_txn().unwrap();
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        let config = IndexerConfig::default();
 								        let indexing_config = IndexDocumentsConfig::default();
 								        let mut builder =
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
 								                .unwrap();
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        builder.add_documents(content).unwrap();
 								        builder.execute().unwrap();
-												Add test checking the bug reported in meilisearch issue 1716

											
										
										
											2021-09-23 15:55:39 +02:00
+								        wtxn.commit().unwrap();
 								        // Check that there is 4 document now.
 								        let rtxn = index.read_txn().unwrap();
 								        let count = index.number_of_documents(&rtxn).unwrap();
 								        assert_eq!(count, 4);
 								        let content = documents!([
 								            {"id": 0, "name": "Kerollmops", "score": 78},
 								            {"id": 1, "name": "ManyTheFish", "score": 75},
 								            {"id": 2, "name": "Ferdi", "score": 39},
 								            {"id": 3, "name": "Tommy", "score": 33}
 								        ]);
 								        let mut wtxn = index.write_txn().unwrap();
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        let config = IndexerConfig::default();
 								        let indexing_config = IndexDocumentsConfig::default();
 								        let mut builder =
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
 								                .unwrap();
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        builder.add_documents(content).unwrap();
 								        builder.execute().unwrap();
-												Add test checking the bug reported in meilisearch issue 1716

											
										
										
											2021-09-23 15:55:39 +02:00
+								        wtxn.commit().unwrap();
 								        // Check that there is 4 document now.
 								        let rtxn = index.read_txn().unwrap();
 								        let count = index.number_of_documents(&rtxn).unwrap();
 								        assert_eq!(count, 4);
 								    }
-												Add failing test related to Meilisearch#1714

											
										
										
											2021-09-28 12:05:11 +02:00
 								    #[test]
 								    fn test_meilisearch_1714() {
 								        let path = tempfile::tempdir().unwrap();
 								        let mut options = EnvOpenOptions::new();
 								        options.map_size(10 * 1024 * 1024); // 10 MB
 								        let index = Index::new(options, &path).unwrap();
 								        let content = documents!([
 								          {"id": "123", "title": "小化妆包" },
 								          {"id": "456", "title": "Ipad 包" }
 								        ]);
 								        let mut wtxn = index.write_txn().unwrap();
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        let config = IndexerConfig::default();
 								        let indexing_config = IndexDocumentsConfig::default();
 								        let mut builder =
-												nested fields

											
										
										
											2022-03-23 17:28:41 +01:00
+								            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
 								                .unwrap();
-												document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt

											
										
										
											2021-12-08 14:12:07 +01:00
+								        builder.add_documents(content).unwrap();
 								        builder.execute().unwrap();
-												Add failing test related to Meilisearch#1714

											
										
										
											2021-09-28 12:05:11 +02:00
+								        wtxn.commit().unwrap();
 								        let rtxn = index.read_txn().unwrap();
 								        // Only the first document should match.
 								        let count = index.word_docids.get(&rtxn, "化妆包").unwrap().unwrap().len();
 								        assert_eq!(count, 1);
 								        // Only the second document should match.
 								        let count = index.word_docids.get(&rtxn, "包").unwrap().unwrap().len();
 								        assert_eq!(count, 1);
 								        let mut search = crate::Search::new(&rtxn, &index);
 								        search.query("化妆包");
 								        search.authorize_typos(true);
 								        search.optional_words(true);
 								        // only 1 document should be returned
 								        let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
 								        assert_eq!(documents_ids.len(), 1);
 								    }
-												Add a test to make sure that long words are handled

											
										
										
											2022-04-21 13:45:28 +02:00
 								    /// We try to index documents with words that are too long here,
 								    /// it should not return any error.
 								    #[test]
 								    fn text_with_too_long_words() {
 								        let path = tempfile::tempdir().unwrap();
 								        let mut options = EnvOpenOptions::new();
 								        options.map_size(10 * 1024 * 1024); // 10 MB
 								        let index = Index::new(options, &path).unwrap();
 								        let content = documents!([
 								          {"id": 1, "title": "a".repeat(256) },
 								          {"id": 2, "title": "b".repeat(512) },
 								          {"id": 3, "title": format!("{} {}", "c".repeat(250), "d".repeat(250)) },
 								        ]);
 								        let mut wtxn = index.write_txn().unwrap();
 								        let config = IndexerConfig::default();
 								        let indexing_config = IndexDocumentsConfig::default();
 								        let mut builder =
 								            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
 								                .unwrap();
 								        builder.add_documents(content).unwrap();
 								        builder.execute().unwrap();
 								        wtxn.commit().unwrap();
 								    }
-												Add a test for long keys in LMDB

											
										
										
											2022-05-03 10:03:13 +02:00
 								    #[test]
 								    fn text_with_too_long_keys() {
 								        let path = tempfile::tempdir().unwrap();
 								        let mut options = EnvOpenOptions::new();
 								        options.map_size(10 * 1024 * 1024); // 10 MB
 								        let index = Index::new(options, &path).unwrap();
 								        let script = "https://bug.example.com/meilisearch/milli.saml2?ROLE=Programmer-1337&SAMLRequest=Cy1ytcZT1Po%2L2IY2y9Unru8rgnW4qWfPiI0EpT7P8xjJV8PeQikRL%2E8D9A4pj9tmbymbQCQwGmGjPMK7qwXFPX4DH52JO2b7n6TXjuR7zkIFuYdzdY2rwRNBPgCL7ihclEm9zyIjKZQ%2JTqiwfXxWjnI0KEYQYHdwd6Q%2Fx%28BDLNsvmL54CCY2F4RWeRs4eqWfn%2EHqxlhreFzax4AiQ2tgOtV5thOaaWqrhZD%2Py70nuyZWNTKwciGI43AoHg6PThANsQ5rAY5amzN%2ufbs1swETUXlLZuOut5YGpYPZfY6STJWNp4QYSUOUXBZpdElYsH7UHZ7VhJycgyt%28aTK0GW6GbKne2tJM0hgSczOqndg6RFa9WsnSBi4zMcaEfYur4WlSsHDYInF9ROousKqVMZ6H8%2gbUissaLh1eXRGo8KEJbyEHbhVVKGD%28kx4cfKjx9fT3pkeDTdvDrVn25jIzi9wHyt9l1lWc8ICnCvXCVUPP%2BjBG4wILR29gMV9Ux2QOieQm2%2Fycybhr8sBGCl30mHC7blvWt%2T3mrCHQoS3VK49PZNPqBZO9C7vOjOWoszNkJx4QckWV%2FZFvbpzUUkiBiehr9F%2FvQSxz9lzv68GwbTu9fr638p%2FQM%3D&RelayState=https%3A%2F%example.bug.com%2Fde&SigAlg=http%3A%2F%2Fwww.w3.org%2F2000%2F09%2Fxmldsig%23rsa-sha1&Signature=AZFpkhFFII7PodiewTovaGnLQKUVZp0qOCCcBIUkJ6P5by3lE3Lldj9pKaFu4wz4j%2B015HEhDvF0LlAmwwES85vdGh%2FpD%2cIQPRUEjdCbQkQDd3dy1mMXbpXxSe4QYcv9Ni7tqNTQxekpO1gE7rtg6zC66EU55uM9aj9abGQ034Vly%2F6IJ08bvAq%2B%2FB9KruLstuiNWnlXTfNGsOxGLK7%2BXr94LTkat8m%2FMan6Qr95%2KeR5TmmqaQIE4N9H6o4TopT7mXr5CF2Z3";
 								        // Create 200 documents with a long text
 								        let content = {
 								            let documents: Vec<_> = (0..200i32)
 								                .into_iter()
 								                .map(|i| serde_json::json!({ "id": i, "script": script }))
 								                .collect();
 								            let mut writer = std::io::Cursor::new(Vec::new());
 								            let mut builder = crate::documents::DocumentBatchBuilder::new(&mut writer).unwrap();
 								            let documents = serde_json::to_vec(&documents).unwrap();
 								            builder.extend_from_json(std::io::Cursor::new(documents)).unwrap();
 								            builder.finish().unwrap();
 								            writer.set_position(0);
 								            crate::documents::DocumentBatchReader::from_reader(writer).unwrap()
 								        };
 								        // Index those 200 long documents
 								        let mut wtxn = index.write_txn().unwrap();
 								        let config = IndexerConfig::default();
 								        let indexing_config = IndexDocumentsConfig::default();
 								        let mut builder =
 								            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
 								                .unwrap();
 								        builder.add_documents(content).unwrap();
 								        builder.execute().unwrap();
 								        // Create one long document
 								        let content = documents!([
 								          {"id": 400, "script": script },
 								        ]);
 								        // Index this one long document
 								        let config = IndexerConfig::default();
 								        let indexing_config = IndexDocumentsConfig::default();
 								        let mut builder =
 								            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
 								                .unwrap();
 								        builder.add_documents(content).unwrap();
 								        builder.execute().unwrap();
 								        wtxn.commit().unwrap();
 								    }
-												add failing test

											
										
										
											2022-06-07 12:24:06 +02:00
 								    #[test]
 								    fn index_documents_in_multiple_transforms() {
 								        let tmp = tempfile::tempdir().unwrap();
 								        let mut options = EnvOpenOptions::new();
 								        options.map_size(4096 * 100);
 								        let index = Index::new(options, tmp).unwrap();
 								        let mut wtxn = index.write_txn().unwrap();
 								        let indexer_config = IndexerConfig::default();
 								        let mut builder = IndexDocuments::new(
 								            &mut wtxn,
 								            &index,
 								            &indexer_config,
 								            IndexDocumentsConfig::default(),
 								            |_| (),
 								        )
 								        .unwrap();
 								        let doc1 = documents! {[{
 								            "id": 228142,
 								            "title": "asdsad",
 								            "state": "automated",
 								            "priority": "normal",
 								            "public_uid": "37ccf021",
 								            "project_id": 78207,
 								            "branch_id_number": 0
 								        }]};
 								        let doc2 = documents! {[{
 								            "id": 228143,
 								            "title": "something",
 								            "state": "automated",
 								            "priority": "normal",
 								            "public_uid": "39c6499b",
 								            "project_id": 78207,
 								            "branch_id_number": 0
 								        }]};
 								        builder.add_documents(doc1).unwrap();
 								        builder.add_documents(doc2).unwrap();
 								        builder.execute().unwrap();
 								        let map = index.external_documents_ids(&wtxn).unwrap().to_hash_map();
 								        let ids = map.values().collect::<HashSet<_>>();
 								        assert_eq!(ids.len(), map.len());
 								    }
-												Add a database containing the docids where each field exists

											
										
										
											2022-07-19 09:30:19 +02:00
 								    #[test]
 								    fn index_documents_check_exists_database_reindex() {
 								        let path = tempfile::tempdir().unwrap();
 								        let mut options = EnvOpenOptions::new();
 								        options.map_size(10 * 1024 * 1024); // 10 MB
 								        let index = Index::new(options, &path).unwrap();
 								        let mut wtxn = index.write_txn().unwrap();
 								        let content = documents!([
 								            {
 								                "id": 0,
 								                "colour": 0,
 								            },
 								            {
 								                "id": 1,
 								                "colour": []
 								            },
 								            {
 								                "id": 2,
 								                "colour": {}
 								            },
 								            {
 								                "id": 3,
 								                "colour": null
 								            },
 								            {
 								                "id": 4,
 								                "colour": [1]
 								            },
 								            {
 								                "id": 5
 								            },
 								            {
 								                "id": 6,
 								                "colour": {
 								                    "green": 1
 								                }
 								            }
 								        ]);
 								        let config = IndexerConfig::default();
 								        let indexing_config = IndexDocumentsConfig::default();
 								        let mut builder =
 								            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
 								                .unwrap();
 								        builder.add_documents(content).unwrap();
 								        builder.execute().unwrap();
 								        wtxn.commit().unwrap();
 								        let mut wtxn = index.write_txn().unwrap();
 								        let mut builder = update::Settings::new(&mut wtxn, &index, &config);
 								        let faceted_fields = hashset!(S("colour"));
 								        builder.set_filterable_fields(faceted_fields);
 								        builder.execute(|_| ()).unwrap();
 								        wtxn.commit().unwrap();
 								        let rtxn = index.read_txn().unwrap();
 								        let facets = index.faceted_fields(&rtxn).unwrap();
 								        assert_eq!(facets, hashset!(S("colour"), S("colour.green")));
 								        let colour_id = index.fields_ids_map(&rtxn).unwrap().id("colour").unwrap();
 								        let colour_green_id = index.fields_ids_map(&rtxn).unwrap().id("colour.green").unwrap();
 								        let bitmap_colour = index.facet_id_exists_docids.get(&rtxn, &colour_id).unwrap().unwrap();
 								        assert_eq!(bitmap_colour.into_iter().collect::<Vec<_>>(), vec![0, 1, 2, 3, 4, 6]);
 								        let bitmap_colour_green =
 								            index.facet_id_exists_docids.get(&rtxn, &colour_green_id).unwrap().unwrap();
 								        assert_eq!(bitmap_colour_green.into_iter().collect::<Vec<_>>(), vec![6]);
 								    }
 								    #[test]
 								    fn index_documents_check_exists_database() {
 								        let path = tempfile::tempdir().unwrap();
 								        let mut options = EnvOpenOptions::new();
 								        options.map_size(10 * 1024 * 1024); // 10 MB
 								        let index = Index::new(options, &path).unwrap();
 								        let config = IndexerConfig::default();
 								        let mut wtxn = index.write_txn().unwrap();
 								        let mut builder = update::Settings::new(&mut wtxn, &index, &config);
 								        let faceted_fields = hashset!(S("colour"));
 								        builder.set_filterable_fields(faceted_fields);
 								        builder.execute(|_| ()).unwrap();
 								        wtxn.commit().unwrap();
 								        let content = documents!([
 								            {
 								                "id": 0,
 								                "colour": 0,
 								            },
 								            {
 								                "id": 1,
 								                "colour": []
 								            },
 								            {
 								                "id": 2,
 								                "colour": {}
 								            },
 								            {
 								                "id": 3,
 								                "colour": null
 								            },
 								            {
 								                "id": 4,
 								                "colour": [1]
 								            },
 								            {
 								                "id": 5
 								            },
 								            {
 								                "id": 6,
 								                "colour": {
 								                    "green": 1
 								                }
 								            }
 								        ]);
 								        let indexing_config = IndexDocumentsConfig::default();
 								        let mut wtxn = index.write_txn().unwrap();
 								        let mut builder =
 								            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
 								                .unwrap();
 								        builder.add_documents(content).unwrap();
 								        builder.execute().unwrap();
 								        wtxn.commit().unwrap();
 								        let rtxn = index.read_txn().unwrap();
 								        let facets = index.faceted_fields(&rtxn).unwrap();
 								        assert_eq!(facets, hashset!(S("colour"), S("colour.green")));
 								        let colour_id = index.fields_ids_map(&rtxn).unwrap().id("colour").unwrap();
 								        let colour_green_id = index.fields_ids_map(&rtxn).unwrap().id("colour.green").unwrap();
 								        let bitmap_colour = index.facet_id_exists_docids.get(&rtxn, &colour_id).unwrap().unwrap();
 								        assert_eq!(bitmap_colour.into_iter().collect::<Vec<_>>(), vec![0, 1, 2, 3, 4, 6]);
 								        let bitmap_colour_green =
 								            index.facet_id_exists_docids.get(&rtxn, &colour_green_id).unwrap().unwrap();
 								        assert_eq!(bitmap_colour_green.into_iter().collect::<Vec<_>>(), vec![6]);
 								    }
-												Fix a documents indexing bug and add a test

											
										
										
											2020-10-30 12:14:25 +01:00
+								}