Merge pull request #116 from meilisearch/index-metadata

add index metadata
This commit is contained in:
Clément Renault 2021-03-15 14:20:50 +01:00 committed by GitHub
commit b7b23cd4a8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 50 additions and 0 deletions

2
Cargo.lock generated
View File

@ -302,6 +302,7 @@ dependencies = [
"libc",
"num-integer",
"num-traits",
"serde",
"time",
"winapi 0.3.9",
]
@ -1277,6 +1278,7 @@ dependencies = [
"anyhow",
"bstr",
"byteorder",
"chrono",
"criterion",
"crossbeam-channel",
"csv",

View File

@ -8,6 +8,7 @@ edition = "2018"
anyhow = "1.0.38"
bstr = "0.2.15"
byteorder = "1.4.2"
chrono = { version = "0.4.19", features = ["serde"] }
crossbeam-channel = "0.5.0"
csv = "1.1.5"
either = "1.6.1"

View File

@ -6,6 +6,7 @@ use anyhow::Context;
use heed::types::*;
use heed::{PolyDatabase, Database, RwTxn, RoTxn};
use roaring::RoaringBitmap;
use chrono::{Utc, DateTime};
use crate::facet::FacetType;
use crate::fields_ids_map::FieldsIdsMap;
@ -28,6 +29,8 @@ pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids";
pub const SOFT_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "soft-external-documents-ids";
pub const WORDS_FST_KEY: &str = "words-fst";
pub const WORDS_PREFIXES_FST_KEY: &str = "words-prefixes-fst";
const CREATED_AT_KEY: &str = "created-at";
const UPDATED_AT_KEY: &str = "updated-at";
#[derive(Clone)]
pub struct Index {
@ -68,6 +71,17 @@ impl Index {
let field_id_docid_facet_values = env.create_database(Some("field-id-docid-facet-values"))?;
let documents = env.create_database(Some("documents"))?;
{
let mut txn = env.write_txn()?;
// The db was just created, we update its metadata with the relevant information.
if main.get::<_, Str, SerdeJson<DateTime<Utc>>>(&txn, CREATED_AT_KEY)?.is_none() {
let now = Utc::now();
main.put::<_, Str, SerdeJson<DateTime<Utc>>>(&mut txn, UPDATED_AT_KEY, &now)?;
main.put::<_, Str, SerdeJson<DateTime<Utc>>>(&mut txn, CREATED_AT_KEY, &now)?;
txn.commit()?;
}
}
Ok(Index {
env,
main,
@ -128,6 +142,7 @@ impl Index {
/// Writes the documents primary key, this is the field name that is used to store the id.
pub fn put_primary_key(&self, wtxn: &mut RwTxn, primary_key: &str) -> heed::Result<()> {
self.set_updated_at(wtxn, &Utc::now())?;
self.main.put::<_, Str, Str>(wtxn, PRIMARY_KEY_KEY, &primary_key)
}
@ -393,4 +408,24 @@ impl Index {
pub fn search<'a>(&'a self, rtxn: &'a RoTxn) -> Search<'a> {
Search::new(rtxn, self)
}
/// Returns the index creation time.
pub fn created_at(&self, rtxn: &RoTxn) -> heed::Result<DateTime<Utc>> {
let time = self.main
.get::<_, Str, SerdeJson<DateTime<Utc>>>(rtxn, CREATED_AT_KEY)?
.expect("Index without creation time");
Ok(time)
}
/// Returns the index last updated time.
pub fn updated_at(&self, rtxn: &RoTxn) -> heed::Result<DateTime<Utc>> {
let time = self.main
.get::<_, Str, SerdeJson<DateTime<Utc>>>(rtxn, UPDATED_AT_KEY)?
.expect("Index without update time");
Ok(time)
}
pub(crate) fn set_updated_at(&self, wtxn: &mut RwTxn, time: &DateTime<Utc>) -> heed::Result<()> {
self.main.put::<_, Str, SerdeJson<DateTime<Utc>>>(wtxn, UPDATED_AT_KEY, &time)
}
}

View File

@ -1,3 +1,4 @@
use chrono::Utc;
use roaring::RoaringBitmap;
use crate::{ExternalDocumentsIds, Index};
@ -18,6 +19,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
}
pub fn execute(self) -> anyhow::Result<u64> {
self.index.set_updated_at(self.wtxn, &Utc::now())?;
let Index {
env: _env,
main: _main,

View File

@ -1,4 +1,5 @@
use anyhow::anyhow;
use chrono::Utc;
use fst::IntoStreamer;
use heed::types::ByteSlice;
use roaring::RoaringBitmap;
@ -52,6 +53,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
}
pub fn execute(self) -> anyhow::Result<u64> {
self.index.set_updated_at(self.wtxn, &Utc::now())?;
// We retrieve the current documents ids that are in the database.
let mut documents_ids = self.index.documents_ids(self.wtxn)?;

View File

@ -2,6 +2,7 @@ use std::cmp;
use std::fs::File;
use std::num::NonZeroUsize;
use chrono::Utc;
use grenad::{CompressionType, Reader, Writer, FileFuse};
use heed::types::{ByteSlice, DecodeIgnore};
use heed::{BytesEncode, Error};
@ -57,6 +58,7 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
}
pub fn execute(self) -> anyhow::Result<()> {
self.index.set_updated_at(self.wtxn, &Utc::now())?;
// We get the faceted fields to be able to create the facet levels.
let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?;

View File

@ -8,6 +8,7 @@ use std::time::Instant;
use anyhow::Context;
use bstr::ByteSlice as _;
use chrono::Utc;
use grenad::{MergerIter, Writer, Sorter, Merger, Reader, FileFuse, CompressionType};
use heed::types::ByteSlice;
use log::{debug, info, error};
@ -316,6 +317,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
R: io::Read,
F: Fn(UpdateIndexingStep, u64) + Sync,
{
self.index.set_updated_at(self.wtxn, &Utc::now())?;
let before_transform = Instant::now();
let update_id = self.update_id;
let progress_callback = |step| progress_callback(step, update_id);

View File

@ -2,6 +2,7 @@ use std::collections::HashMap;
use std::str::FromStr;
use anyhow::Context;
use chrono::Utc;
use grenad::CompressionType;
use itertools::Itertools;
use rayon::ThreadPool;
@ -249,6 +250,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
where
F: Fn(UpdateIndexingStep, u64) + Sync
{
self.index.set_updated_at(self.wtxn, &Utc::now())?;
let old_fields_ids_map = self.index.fields_ids_map(&self.wtxn)?;
self.update_displayed()?;
let facets_updated = self.update_facets()?;

View File

@ -1,6 +1,7 @@
use std::iter::FromIterator;
use std::str;
use chrono::Utc;
use fst::automaton::Str;
use fst::{Automaton, Streamer, IntoStreamer};
use grenad::CompressionType;
@ -68,6 +69,7 @@ impl<'t, 'u, 'i> WordsPrefixes<'t, 'u, 'i> {
}
pub fn execute(self) -> anyhow::Result<()> {
self.index.set_updated_at(self.wtxn, &Utc::now())?;
// Clear the words prefixes datastructures.
self.index.word_prefix_docids.clear(self.wtxn)?;
self.index.word_prefix_pair_proximity_docids.clear(self.wtxn)?;