Ensure that Index methods are not bypassed by Meilisearch

This commit is contained in:
ManyTheFish 2022-06-13 16:39:17 +02:00
parent f1d848bb9a
commit 0d1d354052
3 changed files with 21 additions and 26 deletions

View File

@ -229,7 +229,7 @@ impl Performer for DocumentAddition {
println!("Adding {} documents to the index.", reader.len()); println!("Adding {} documents to the index.", reader.len());
let mut txn = index.env.write_txn()?; let mut txn = index.write_txn()?;
let config = milli::update::IndexerConfig { log_every_n: Some(100), ..Default::default() }; let config = milli::update::IndexerConfig { log_every_n: Some(100), ..Default::default() };
let update_method = if self.update_documents { let update_method = if self.update_documents {
IndexDocumentsMethod::UpdateDocuments IndexDocumentsMethod::UpdateDocuments
@ -424,7 +424,7 @@ impl Search {
offset: &Option<usize>, offset: &Option<usize>,
limit: &Option<usize>, limit: &Option<usize>,
) -> Result<Vec<Map<String, Value>>> { ) -> Result<Vec<Map<String, Value>>> {
let txn = index.env.read_txn()?; let txn = index.read_txn()?;
let mut search = index.search(&txn); let mut search = index.search(&txn);
if let Some(ref query) = query { if let Some(ref query) = query {
@ -475,7 +475,7 @@ struct SettingsUpdate {
impl Performer for SettingsUpdate { impl Performer for SettingsUpdate {
fn perform(self, index: milli::Index) -> Result<()> { fn perform(self, index: milli::Index) -> Result<()> {
let mut txn = index.env.write_txn()?; let mut txn = index.write_txn()?;
let config = IndexerConfig { log_every_n: Some(100), ..Default::default() }; let config = IndexerConfig { log_every_n: Some(100), ..Default::default() };

View File

@ -371,11 +371,9 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho
use std::cmp::Reverse; use std::cmp::Reverse;
use std::collections::BinaryHeap; use std::collections::BinaryHeap;
use heed::types::{ByteSlice, Str}; use heed::types::ByteSlice;
let Index { let Index {
env: _env,
main,
word_docids, word_docids,
word_prefix_docids, word_prefix_docids,
docid_word_positions, docid_word_positions,
@ -390,7 +388,7 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho
exact_word_prefix_docids, exact_word_prefix_docids,
field_id_docid_facet_f64s: _, field_id_docid_facet_f64s: _,
field_id_docid_facet_strings: _, field_id_docid_facet_strings: _,
documents, ..
} = index; } = index;
let main_name = "main"; let main_name = "main";
@ -425,12 +423,11 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho
heap.pop(); heap.pop();
} }
if let Some(documents_ids) = main.get::<_, Str, ByteSlice>(rtxn, "documents-ids")? { let documents_ids = index.documents_ids(rtxn)?;
heap.push(Reverse((documents_ids.len(), format!("documents-ids"), main_name))); heap.push(Reverse((documents_ids.len() as usize, format!("documents-ids"), main_name)));
if heap.len() > limit { if heap.len() > limit {
heap.pop(); heap.pop();
} }
}
for result in word_docids.remap_data_type::<ByteSlice>().iter(rtxn)? { for result in word_docids.remap_data_type::<ByteSlice>().iter(rtxn)? {
let (word, value) = result?; let (word, value) = result?;
@ -549,9 +546,10 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho
} }
} }
for result in documents.remap_data_type::<ByteSlice>().iter(rtxn)? { for result in index.all_documents(rtxn)? {
let (id, value) = result?; let (id, value) = result?;
heap.push(Reverse((value.len(), id.to_string(), documents_name))); let size = value.iter().map(|(k, v)| k.to_ne_bytes().len() + v.len()).sum();
heap.push(Reverse((size, id.to_string(), documents_name)));
if heap.len() > limit { if heap.len() > limit {
heap.pop(); heap.pop();
} }
@ -877,7 +875,7 @@ fn export_documents(
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
use std::io::{BufWriter, Write as _}; use std::io::{BufWriter, Write as _};
use milli::{obkv_to_json, BEU32}; use milli::obkv_to_json;
let stdout = io::stdout(); let stdout = io::stdout();
let mut out = BufWriter::new(stdout); let mut out = BufWriter::new(stdout);
@ -886,12 +884,13 @@ fn export_documents(
let displayed_fields: Vec<_> = fields_ids_map.iter().map(|(id, _name)| id).collect(); let displayed_fields: Vec<_> = fields_ids_map.iter().map(|(id, _name)| id).collect();
let iter: Box<dyn Iterator<Item = _>> = if internal_ids.is_empty() { let iter: Box<dyn Iterator<Item = _>> = if internal_ids.is_empty() {
Box::new(index.documents.iter(rtxn)?.map(|result| result.map(|(_id, obkv)| obkv))) Box::new(index.all_documents(rtxn)?.map(|result| result.map(|(_id, obkv)| obkv)))
} else { } else {
Box::new( Box::new(
internal_ids index
.documents(rtxn, internal_ids.into_iter())?
.into_iter() .into_iter()
.flat_map(|id| index.documents.get(rtxn, &BEU32::new(id)).transpose()), .map(|(_id, obkv)| Ok(obkv)),
) )
}; };
@ -973,8 +972,6 @@ fn size_of_databases(index: &Index, rtxn: &heed::RoTxn, names: Vec<String>) -> a
use heed::types::ByteSlice; use heed::types::ByteSlice;
let Index { let Index {
env: _env,
main,
word_docids, word_docids,
word_prefix_docids, word_prefix_docids,
docid_word_positions, docid_word_positions,
@ -989,7 +986,7 @@ fn size_of_databases(index: &Index, rtxn: &heed::RoTxn, names: Vec<String>) -> a
field_id_docid_facet_strings, field_id_docid_facet_strings,
exact_word_prefix_docids, exact_word_prefix_docids,
exact_word_docids, exact_word_docids,
documents, ..
} = index; } = index;
let names = if names.is_empty() { let names = if names.is_empty() {
@ -1000,7 +997,6 @@ fn size_of_databases(index: &Index, rtxn: &heed::RoTxn, names: Vec<String>) -> a
for name in names { for name in names {
let database = match name.as_str() { let database = match name.as_str() {
MAIN => &main,
WORD_PREFIX_DOCIDS => word_prefix_docids.as_polymorph(), WORD_PREFIX_DOCIDS => word_prefix_docids.as_polymorph(),
WORD_DOCIDS => word_docids.as_polymorph(), WORD_DOCIDS => word_docids.as_polymorph(),
DOCID_WORD_POSITIONS => docid_word_positions.as_polymorph(), DOCID_WORD_POSITIONS => docid_word_positions.as_polymorph(),
@ -1016,7 +1012,6 @@ fn size_of_databases(index: &Index, rtxn: &heed::RoTxn, names: Vec<String>) -> a
EXACT_WORD_DOCIDS => exact_word_docids.as_polymorph(), EXACT_WORD_DOCIDS => exact_word_docids.as_polymorph(),
EXACT_WORD_PREFIX_DOCIDS => exact_word_prefix_docids.as_polymorph(), EXACT_WORD_PREFIX_DOCIDS => exact_word_prefix_docids.as_polymorph(),
DOCUMENTS => documents.as_polymorph(),
unknown => anyhow::bail!("unknown database {:?}", unknown), unknown => anyhow::bail!("unknown database {:?}", unknown),
}; };

View File

@ -82,10 +82,10 @@ pub mod db_name {
#[derive(Clone)] #[derive(Clone)]
pub struct Index { pub struct Index {
/// The LMDB environment which this index is associated with. /// The LMDB environment which this index is associated with.
pub env: heed::Env, pub(crate) env: heed::Env,
/// Contains many different types (e.g. the fields ids map). /// Contains many different types (e.g. the fields ids map).
pub main: PolyDatabase, pub(crate) main: PolyDatabase,
/// A word and all the documents ids containing the word. /// A word and all the documents ids containing the word.
pub word_docids: Database<Str, RoaringBitmapCodec>, pub word_docids: Database<Str, RoaringBitmapCodec>,
@ -125,7 +125,7 @@ pub struct Index {
pub field_id_docid_facet_strings: Database<FieldDocIdFacetStringCodec, Str>, pub field_id_docid_facet_strings: Database<FieldDocIdFacetStringCodec, Str>,
/// Maps the document id to the document as an obkv store. /// Maps the document id to the document as an obkv store.
pub documents: Database<OwnedType<BEU32>, ObkvCodec>, pub(crate) documents: Database<OwnedType<BEU32>, ObkvCodec>,
} }
impl Index { impl Index {