mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-23 13:24:27 +01:00
Merge pull request #183 from meilisearch/number-of-documents
Compute the number of documents on updates
This commit is contained in:
commit
aaeb25828f
@ -35,14 +35,16 @@ impl DocumentsIndex {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn del_all_document_fields(&self, id: DocumentId) -> RocksDbResult<()> {
|
pub fn del_all_document_fields(&self, id: DocumentId) -> RocksDbResult<usize> {
|
||||||
let (start, end) = document_fields_range(id);
|
let (start, end) = document_fields_range(id);
|
||||||
|
|
||||||
|
let mut count = 0;
|
||||||
for (key, _) in self.0.range(start, end)? {
|
for (key, _) in self.0.range(start, end)? {
|
||||||
self.0.remove(key)?;
|
self.0.remove(key)?;
|
||||||
|
count += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(count)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn document_fields(&self, id: DocumentId) -> RocksDbResult<DocumentFieldsIter> {
|
pub fn document_fields(&self, id: DocumentId) -> RocksDbResult<DocumentFieldsIter> {
|
||||||
@ -52,7 +54,7 @@ impl DocumentsIndex {
|
|||||||
Ok(DocumentFieldsIter(iter))
|
Ok(DocumentFieldsIter(iter))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn len(&self) -> RocksDbResult<usize> {
|
pub fn len(&self) -> RocksDbResult<u64> {
|
||||||
let mut last_document_id = None;
|
let mut last_document_id = None;
|
||||||
let mut count = 0;
|
let mut count = 0;
|
||||||
|
|
||||||
|
@ -1,14 +1,16 @@
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
use std::convert::TryInto;
|
||||||
|
|
||||||
use meilidb_schema::Schema;
|
use meilidb_schema::Schema;
|
||||||
|
|
||||||
use crate::ranked_map::RankedMap;
|
use crate::ranked_map::RankedMap;
|
||||||
use crate::database::Error;
|
use crate::database::Error;
|
||||||
|
|
||||||
const SCHEMA_KEY: &str = "schema";
|
const SCHEMA_KEY: &str = "schema";
|
||||||
const WORDS_KEY: &str = "words";
|
const WORDS_KEY: &str = "words";
|
||||||
const SYNONYMS_KEY: &str = "synonyms";
|
const SYNONYMS_KEY: &str = "synonyms";
|
||||||
const RANKED_MAP_KEY: &str = "ranked-map";
|
const RANKED_MAP_KEY: &str = "ranked-map";
|
||||||
|
const NUMBER_OF_DOCUMENTS_KEY: &str = "number-of-documents";
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct MainIndex(pub(crate) crate::CfTree);
|
pub struct MainIndex(pub(crate) crate::CfTree);
|
||||||
@ -79,4 +81,22 @@ impl MainIndex {
|
|||||||
self.0.insert(RANKED_MAP_KEY, bytes)?;
|
self.0.insert(RANKED_MAP_KEY, bytes)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn number_of_documents(&self) -> Result<u64, Error> {
|
||||||
|
match self.0.get(NUMBER_OF_DOCUMENTS_KEY)? {
|
||||||
|
Some(bytes) => {
|
||||||
|
let array = (*bytes).try_into().unwrap();
|
||||||
|
Ok(u64::from_be_bytes(array))
|
||||||
|
},
|
||||||
|
None => Ok(0),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn set_number_of_documents<F>(&self, f: F) -> Result<u64, Error>
|
||||||
|
where F: FnOnce(u64) -> u64,
|
||||||
|
{
|
||||||
|
let new = self.number_of_documents().map(f)?;
|
||||||
|
self.0.insert(NUMBER_OF_DOCUMENTS_KEY, new.to_be_bytes())?;
|
||||||
|
Ok(new)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -164,7 +164,7 @@ fn last_update_id(
|
|||||||
#[derive(Copy, Clone)]
|
#[derive(Copy, Clone)]
|
||||||
pub struct IndexStats {
|
pub struct IndexStats {
|
||||||
pub number_of_words: usize,
|
pub number_of_words: usize,
|
||||||
pub number_of_documents: usize,
|
pub number_of_documents: u64,
|
||||||
pub number_attrs_in_ranked_map: usize,
|
pub number_attrs_in_ranked_map: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -192,6 +192,7 @@ pub(crate) struct Cache {
|
|||||||
pub synonyms: Arc<fst::Set>,
|
pub synonyms: Arc<fst::Set>,
|
||||||
pub schema: Schema,
|
pub schema: Schema,
|
||||||
pub ranked_map: RankedMap,
|
pub ranked_map: RankedMap,
|
||||||
|
pub number_of_documents: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Index {
|
impl Index {
|
||||||
@ -241,7 +242,9 @@ impl Index {
|
|||||||
None => RankedMap::default(),
|
None => RankedMap::default(),
|
||||||
};
|
};
|
||||||
|
|
||||||
let cache = Cache { words, synonyms, schema, ranked_map };
|
let number_of_documents = documents_index.len()?;
|
||||||
|
|
||||||
|
let cache = Cache { words, synonyms, schema, ranked_map, number_of_documents };
|
||||||
let cache = Arc::new(ArcSwap::from_pointee(cache));
|
let cache = Arc::new(ArcSwap::from_pointee(cache));
|
||||||
|
|
||||||
let last_update_id = last_update_id(&updates_index, &updates_results_index)?;
|
let last_update_id = last_update_id(&updates_index, &updates_results_index)?;
|
||||||
@ -280,7 +283,7 @@ impl Index {
|
|||||||
let cache = self.cache.load();
|
let cache = self.cache.load();
|
||||||
Ok(IndexStats {
|
Ok(IndexStats {
|
||||||
number_of_words: cache.words.len(),
|
number_of_words: cache.words.len(),
|
||||||
number_of_documents: self.documents_index.len()?,
|
number_of_documents: cache.number_of_documents,
|
||||||
number_attrs_in_ranked_map: cache.ranked_map.len(),
|
number_attrs_in_ranked_map: cache.ranked_map.len(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@ -319,6 +322,10 @@ impl Index {
|
|||||||
self.custom_settings_index.clone()
|
self.custom_settings_index.clone()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn number_of_documents(&self) -> u64 {
|
||||||
|
self.cache.load().number_of_documents
|
||||||
|
}
|
||||||
|
|
||||||
pub fn documents_addition<D>(&self) -> DocumentsAddition<D> {
|
pub fn documents_addition<D>(&self) -> DocumentsAddition<D> {
|
||||||
DocumentsAddition::new(self)
|
DocumentsAddition::new(self)
|
||||||
}
|
}
|
||||||
|
@ -5,11 +5,10 @@ use fst::{SetBuilder, set::OpBuilder};
|
|||||||
use sdset::{SetOperation, duo::Union};
|
use sdset::{SetOperation, duo::Union};
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
|
|
||||||
|
use crate::RankedMap;
|
||||||
|
use crate::database::{Error, Index, index::Cache, apply_documents_deletion};
|
||||||
use crate::indexer::Indexer;
|
use crate::indexer::Indexer;
|
||||||
use crate::serde::{extract_document_id, Serializer, RamDocumentStore};
|
use crate::serde::{extract_document_id, Serializer, RamDocumentStore};
|
||||||
use crate::RankedMap;
|
|
||||||
|
|
||||||
use crate::database::{Error, Index, index::Cache, apply_documents_deletion};
|
|
||||||
|
|
||||||
pub struct DocumentsAddition<'a, D> {
|
pub struct DocumentsAddition<'a, D> {
|
||||||
index: &'a Index,
|
index: &'a Index,
|
||||||
@ -73,8 +72,8 @@ pub fn apply_documents_addition(
|
|||||||
let words = ref_index.words_index;
|
let words = ref_index.words_index;
|
||||||
|
|
||||||
// 1. remove the previous documents match indexes
|
// 1. remove the previous documents match indexes
|
||||||
let document_ids = document_ids.into_iter().collect();
|
let documents_to_insert = document_ids.iter().cloned().collect();
|
||||||
apply_documents_deletion(index, ranked_map.clone(), document_ids)?;
|
apply_documents_deletion(index, ranked_map.clone(), documents_to_insert)?;
|
||||||
|
|
||||||
// 2. insert new document attributes in the database
|
// 2. insert new document attributes in the database
|
||||||
for ((id, attr), value) in document_store.into_inner() {
|
for ((id, attr), value) in document_store.into_inner() {
|
||||||
@ -124,13 +123,16 @@ pub fn apply_documents_addition(
|
|||||||
main.set_words_set(&words)?;
|
main.set_words_set(&words)?;
|
||||||
main.set_ranked_map(&ranked_map)?;
|
main.set_ranked_map(&ranked_map)?;
|
||||||
|
|
||||||
|
let inserted_documents_len = document_ids.len() as u64;
|
||||||
|
let number_of_documents = main.set_number_of_documents(|old| old + inserted_documents_len)?;
|
||||||
|
|
||||||
// update the "consistent" view of the Index
|
// update the "consistent" view of the Index
|
||||||
let cache = ref_index.cache;
|
let cache = ref_index.cache;
|
||||||
let words = Arc::new(words);
|
let words = Arc::new(words);
|
||||||
let synonyms = cache.synonyms.clone();
|
let synonyms = cache.synonyms.clone();
|
||||||
let schema = cache.schema.clone();
|
let schema = cache.schema.clone();
|
||||||
|
|
||||||
let cache = Cache { words, synonyms, schema, ranked_map };
|
let cache = Cache { words, synonyms, schema, ranked_map, number_of_documents };
|
||||||
index.cache.store(Arc::new(cache));
|
index.cache.store(Arc::new(cache));
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
use std::collections::{HashMap, BTreeSet};
|
use std::collections::{HashMap, HashSet, BTreeSet};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use fst::{SetBuilder, Streamer};
|
use fst::{SetBuilder, Streamer};
|
||||||
@ -88,6 +88,7 @@ pub fn apply_documents_deletion(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let mut deleted_documents = HashSet::new();
|
||||||
let mut removed_words = BTreeSet::new();
|
let mut removed_words = BTreeSet::new();
|
||||||
for (word, document_ids) in words_document_ids {
|
for (word, document_ids) in words_document_ids {
|
||||||
let document_ids = SetBuf::from_dirty(document_ids);
|
let document_ids = SetBuf::from_dirty(document_ids);
|
||||||
@ -105,7 +106,9 @@ pub fn apply_documents_deletion(
|
|||||||
}
|
}
|
||||||
|
|
||||||
for id in document_ids {
|
for id in document_ids {
|
||||||
documents.del_all_document_fields(id)?;
|
if documents.del_all_document_fields(id)? != 0 {
|
||||||
|
deleted_documents.insert(id);
|
||||||
|
}
|
||||||
docs_words.del_doc_words(id)?;
|
docs_words.del_doc_words(id)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -131,13 +134,16 @@ pub fn apply_documents_deletion(
|
|||||||
main.set_words_set(&words)?;
|
main.set_words_set(&words)?;
|
||||||
main.set_ranked_map(&ranked_map)?;
|
main.set_ranked_map(&ranked_map)?;
|
||||||
|
|
||||||
|
let deleted_documents_len = deleted_documents.len() as u64;
|
||||||
|
let number_of_documents = main.set_number_of_documents(|old| old - deleted_documents_len)?;
|
||||||
|
|
||||||
// update the "consistent" view of the Index
|
// update the "consistent" view of the Index
|
||||||
let cache = ref_index.cache;
|
let cache = ref_index.cache;
|
||||||
let words = Arc::new(words);
|
let words = Arc::new(words);
|
||||||
let synonyms = cache.synonyms.clone();
|
let synonyms = cache.synonyms.clone();
|
||||||
let schema = cache.schema.clone();
|
let schema = cache.schema.clone();
|
||||||
|
|
||||||
let cache = Cache { words, synonyms, schema, ranked_map };
|
let cache = Cache { words, synonyms, schema, ranked_map, number_of_documents };
|
||||||
index.cache.store(Arc::new(cache));
|
index.cache.store(Arc::new(cache));
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
@ -85,8 +85,9 @@ pub fn apply_synonyms_addition(
|
|||||||
let ranked_map = cache.ranked_map.clone();
|
let ranked_map = cache.ranked_map.clone();
|
||||||
let synonyms = Arc::new(synonyms);
|
let synonyms = Arc::new(synonyms);
|
||||||
let schema = cache.schema.clone();
|
let schema = cache.schema.clone();
|
||||||
|
let number_of_documents = cache.number_of_documents;
|
||||||
|
|
||||||
let cache = Cache { words, synonyms, schema, ranked_map };
|
let cache = Cache { words, synonyms, schema, ranked_map, number_of_documents };
|
||||||
index.cache.store(Arc::new(cache));
|
index.cache.store(Arc::new(cache));
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
@ -128,8 +128,9 @@ pub fn apply_synonyms_deletion(
|
|||||||
let ranked_map = cache.ranked_map.clone();
|
let ranked_map = cache.ranked_map.clone();
|
||||||
let synonyms = Arc::new(synonyms);
|
let synonyms = Arc::new(synonyms);
|
||||||
let schema = cache.schema.clone();
|
let schema = cache.schema.clone();
|
||||||
|
let number_of_documents = cache.number_of_documents;
|
||||||
|
|
||||||
let cache = Cache { words, synonyms, schema, ranked_map };
|
let cache = Cache { words, synonyms, schema, ranked_map, number_of_documents };
|
||||||
index.cache.store(Arc::new(cache));
|
index.cache.store(Arc::new(cache));
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
@ -33,6 +33,7 @@ fn insert_delete_document() {
|
|||||||
let status = index.update_status_blocking(update_id).unwrap();
|
let status = index.update_status_blocking(update_id).unwrap();
|
||||||
assert!(as_been_updated.swap(false, Relaxed));
|
assert!(as_been_updated.swap(false, Relaxed));
|
||||||
assert!(status.result.is_ok());
|
assert!(status.result.is_ok());
|
||||||
|
assert_eq!(index.number_of_documents(), 1);
|
||||||
|
|
||||||
let docs = index.query_builder().query("hello", 0..10).unwrap();
|
let docs = index.query_builder().query("hello", 0..10).unwrap();
|
||||||
assert_eq!(docs.len(), 1);
|
assert_eq!(docs.len(), 1);
|
||||||
@ -44,6 +45,7 @@ fn insert_delete_document() {
|
|||||||
let status = index.update_status_blocking(update_id).unwrap();
|
let status = index.update_status_blocking(update_id).unwrap();
|
||||||
assert!(as_been_updated.swap(false, Relaxed));
|
assert!(as_been_updated.swap(false, Relaxed));
|
||||||
assert!(status.result.is_ok());
|
assert!(status.result.is_ok());
|
||||||
|
assert_eq!(index.number_of_documents(), 0);
|
||||||
|
|
||||||
let docs = index.query_builder().query("hello", 0..10).unwrap();
|
let docs = index.query_builder().query("hello", 0..10).unwrap();
|
||||||
assert_eq!(docs.len(), 0);
|
assert_eq!(docs.len(), 0);
|
||||||
@ -71,17 +73,19 @@ fn replace_document() {
|
|||||||
let status = index.update_status_blocking(update_id).unwrap();
|
let status = index.update_status_blocking(update_id).unwrap();
|
||||||
assert!(as_been_updated.swap(false, Relaxed));
|
assert!(as_been_updated.swap(false, Relaxed));
|
||||||
assert!(status.result.is_ok());
|
assert!(status.result.is_ok());
|
||||||
|
assert_eq!(index.number_of_documents(), 1);
|
||||||
|
|
||||||
let docs = index.query_builder().query("hello", 0..10).unwrap();
|
let docs = index.query_builder().query("hello", 0..10).unwrap();
|
||||||
assert_eq!(docs.len(), 1);
|
assert_eq!(docs.len(), 1);
|
||||||
assert_eq!(index.document(None, docs[0].id).unwrap().as_ref(), Some(&doc1));
|
assert_eq!(index.document(None, docs[0].id).unwrap().as_ref(), Some(&doc1));
|
||||||
|
|
||||||
let mut deletion = index.documents_addition();
|
let mut addition = index.documents_addition();
|
||||||
deletion.update_document(&doc2);
|
addition.update_document(&doc2);
|
||||||
let update_id = deletion.finalize().unwrap();
|
let update_id = addition.finalize().unwrap();
|
||||||
let status = index.update_status_blocking(update_id).unwrap();
|
let status = index.update_status_blocking(update_id).unwrap();
|
||||||
assert!(as_been_updated.swap(false, Relaxed));
|
assert!(as_been_updated.swap(false, Relaxed));
|
||||||
assert!(status.result.is_ok());
|
assert!(status.result.is_ok());
|
||||||
|
assert_eq!(index.number_of_documents(), 1);
|
||||||
|
|
||||||
let docs = index.query_builder().query("hello", 0..10).unwrap();
|
let docs = index.query_builder().query("hello", 0..10).unwrap();
|
||||||
assert_eq!(docs.len(), 0);
|
assert_eq!(docs.len(), 0);
|
||||||
|
Loading…
Reference in New Issue
Block a user