mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 23:04:26 +01:00
Ensure that Index methods are not bypassed by Meilisearch
This commit is contained in:
parent
f1d848bb9a
commit
0d1d354052
@ -229,7 +229,7 @@ impl Performer for DocumentAddition {
|
|||||||
|
|
||||||
println!("Adding {} documents to the index.", reader.len());
|
println!("Adding {} documents to the index.", reader.len());
|
||||||
|
|
||||||
let mut txn = index.env.write_txn()?;
|
let mut txn = index.write_txn()?;
|
||||||
let config = milli::update::IndexerConfig { log_every_n: Some(100), ..Default::default() };
|
let config = milli::update::IndexerConfig { log_every_n: Some(100), ..Default::default() };
|
||||||
let update_method = if self.update_documents {
|
let update_method = if self.update_documents {
|
||||||
IndexDocumentsMethod::UpdateDocuments
|
IndexDocumentsMethod::UpdateDocuments
|
||||||
@ -424,7 +424,7 @@ impl Search {
|
|||||||
offset: &Option<usize>,
|
offset: &Option<usize>,
|
||||||
limit: &Option<usize>,
|
limit: &Option<usize>,
|
||||||
) -> Result<Vec<Map<String, Value>>> {
|
) -> Result<Vec<Map<String, Value>>> {
|
||||||
let txn = index.env.read_txn()?;
|
let txn = index.read_txn()?;
|
||||||
let mut search = index.search(&txn);
|
let mut search = index.search(&txn);
|
||||||
|
|
||||||
if let Some(ref query) = query {
|
if let Some(ref query) = query {
|
||||||
@ -475,7 +475,7 @@ struct SettingsUpdate {
|
|||||||
|
|
||||||
impl Performer for SettingsUpdate {
|
impl Performer for SettingsUpdate {
|
||||||
fn perform(self, index: milli::Index) -> Result<()> {
|
fn perform(self, index: milli::Index) -> Result<()> {
|
||||||
let mut txn = index.env.write_txn()?;
|
let mut txn = index.write_txn()?;
|
||||||
|
|
||||||
let config = IndexerConfig { log_every_n: Some(100), ..Default::default() };
|
let config = IndexerConfig { log_every_n: Some(100), ..Default::default() };
|
||||||
|
|
||||||
|
@ -371,11 +371,9 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho
|
|||||||
use std::cmp::Reverse;
|
use std::cmp::Reverse;
|
||||||
use std::collections::BinaryHeap;
|
use std::collections::BinaryHeap;
|
||||||
|
|
||||||
use heed::types::{ByteSlice, Str};
|
use heed::types::ByteSlice;
|
||||||
|
|
||||||
let Index {
|
let Index {
|
||||||
env: _env,
|
|
||||||
main,
|
|
||||||
word_docids,
|
word_docids,
|
||||||
word_prefix_docids,
|
word_prefix_docids,
|
||||||
docid_word_positions,
|
docid_word_positions,
|
||||||
@ -390,7 +388,7 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho
|
|||||||
exact_word_prefix_docids,
|
exact_word_prefix_docids,
|
||||||
field_id_docid_facet_f64s: _,
|
field_id_docid_facet_f64s: _,
|
||||||
field_id_docid_facet_strings: _,
|
field_id_docid_facet_strings: _,
|
||||||
documents,
|
..
|
||||||
} = index;
|
} = index;
|
||||||
|
|
||||||
let main_name = "main";
|
let main_name = "main";
|
||||||
@ -425,11 +423,10 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho
|
|||||||
heap.pop();
|
heap.pop();
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(documents_ids) = main.get::<_, Str, ByteSlice>(rtxn, "documents-ids")? {
|
let documents_ids = index.documents_ids(rtxn)?;
|
||||||
heap.push(Reverse((documents_ids.len(), format!("documents-ids"), main_name)));
|
heap.push(Reverse((documents_ids.len() as usize, format!("documents-ids"), main_name)));
|
||||||
if heap.len() > limit {
|
if heap.len() > limit {
|
||||||
heap.pop();
|
heap.pop();
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for result in word_docids.remap_data_type::<ByteSlice>().iter(rtxn)? {
|
for result in word_docids.remap_data_type::<ByteSlice>().iter(rtxn)? {
|
||||||
@ -549,9 +546,10 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for result in documents.remap_data_type::<ByteSlice>().iter(rtxn)? {
|
for result in index.all_documents(rtxn)? {
|
||||||
let (id, value) = result?;
|
let (id, value) = result?;
|
||||||
heap.push(Reverse((value.len(), id.to_string(), documents_name)));
|
let size = value.iter().map(|(k, v)| k.to_ne_bytes().len() + v.len()).sum();
|
||||||
|
heap.push(Reverse((size, id.to_string(), documents_name)));
|
||||||
if heap.len() > limit {
|
if heap.len() > limit {
|
||||||
heap.pop();
|
heap.pop();
|
||||||
}
|
}
|
||||||
@ -877,7 +875,7 @@ fn export_documents(
|
|||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
use std::io::{BufWriter, Write as _};
|
use std::io::{BufWriter, Write as _};
|
||||||
|
|
||||||
use milli::{obkv_to_json, BEU32};
|
use milli::obkv_to_json;
|
||||||
|
|
||||||
let stdout = io::stdout();
|
let stdout = io::stdout();
|
||||||
let mut out = BufWriter::new(stdout);
|
let mut out = BufWriter::new(stdout);
|
||||||
@ -886,12 +884,13 @@ fn export_documents(
|
|||||||
let displayed_fields: Vec<_> = fields_ids_map.iter().map(|(id, _name)| id).collect();
|
let displayed_fields: Vec<_> = fields_ids_map.iter().map(|(id, _name)| id).collect();
|
||||||
|
|
||||||
let iter: Box<dyn Iterator<Item = _>> = if internal_ids.is_empty() {
|
let iter: Box<dyn Iterator<Item = _>> = if internal_ids.is_empty() {
|
||||||
Box::new(index.documents.iter(rtxn)?.map(|result| result.map(|(_id, obkv)| obkv)))
|
Box::new(index.all_documents(rtxn)?.map(|result| result.map(|(_id, obkv)| obkv)))
|
||||||
} else {
|
} else {
|
||||||
Box::new(
|
Box::new(
|
||||||
internal_ids
|
index
|
||||||
|
.documents(rtxn, internal_ids.into_iter())?
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.flat_map(|id| index.documents.get(rtxn, &BEU32::new(id)).transpose()),
|
.map(|(_id, obkv)| Ok(obkv)),
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -973,8 +972,6 @@ fn size_of_databases(index: &Index, rtxn: &heed::RoTxn, names: Vec<String>) -> a
|
|||||||
use heed::types::ByteSlice;
|
use heed::types::ByteSlice;
|
||||||
|
|
||||||
let Index {
|
let Index {
|
||||||
env: _env,
|
|
||||||
main,
|
|
||||||
word_docids,
|
word_docids,
|
||||||
word_prefix_docids,
|
word_prefix_docids,
|
||||||
docid_word_positions,
|
docid_word_positions,
|
||||||
@ -989,7 +986,7 @@ fn size_of_databases(index: &Index, rtxn: &heed::RoTxn, names: Vec<String>) -> a
|
|||||||
field_id_docid_facet_strings,
|
field_id_docid_facet_strings,
|
||||||
exact_word_prefix_docids,
|
exact_word_prefix_docids,
|
||||||
exact_word_docids,
|
exact_word_docids,
|
||||||
documents,
|
..
|
||||||
} = index;
|
} = index;
|
||||||
|
|
||||||
let names = if names.is_empty() {
|
let names = if names.is_empty() {
|
||||||
@ -1000,7 +997,6 @@ fn size_of_databases(index: &Index, rtxn: &heed::RoTxn, names: Vec<String>) -> a
|
|||||||
|
|
||||||
for name in names {
|
for name in names {
|
||||||
let database = match name.as_str() {
|
let database = match name.as_str() {
|
||||||
MAIN => &main,
|
|
||||||
WORD_PREFIX_DOCIDS => word_prefix_docids.as_polymorph(),
|
WORD_PREFIX_DOCIDS => word_prefix_docids.as_polymorph(),
|
||||||
WORD_DOCIDS => word_docids.as_polymorph(),
|
WORD_DOCIDS => word_docids.as_polymorph(),
|
||||||
DOCID_WORD_POSITIONS => docid_word_positions.as_polymorph(),
|
DOCID_WORD_POSITIONS => docid_word_positions.as_polymorph(),
|
||||||
@ -1016,7 +1012,6 @@ fn size_of_databases(index: &Index, rtxn: &heed::RoTxn, names: Vec<String>) -> a
|
|||||||
EXACT_WORD_DOCIDS => exact_word_docids.as_polymorph(),
|
EXACT_WORD_DOCIDS => exact_word_docids.as_polymorph(),
|
||||||
EXACT_WORD_PREFIX_DOCIDS => exact_word_prefix_docids.as_polymorph(),
|
EXACT_WORD_PREFIX_DOCIDS => exact_word_prefix_docids.as_polymorph(),
|
||||||
|
|
||||||
DOCUMENTS => documents.as_polymorph(),
|
|
||||||
unknown => anyhow::bail!("unknown database {:?}", unknown),
|
unknown => anyhow::bail!("unknown database {:?}", unknown),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -82,10 +82,10 @@ pub mod db_name {
|
|||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct Index {
|
pub struct Index {
|
||||||
/// The LMDB environment which this index is associated with.
|
/// The LMDB environment which this index is associated with.
|
||||||
pub env: heed::Env,
|
pub(crate) env: heed::Env,
|
||||||
|
|
||||||
/// Contains many different types (e.g. the fields ids map).
|
/// Contains many different types (e.g. the fields ids map).
|
||||||
pub main: PolyDatabase,
|
pub(crate) main: PolyDatabase,
|
||||||
|
|
||||||
/// A word and all the documents ids containing the word.
|
/// A word and all the documents ids containing the word.
|
||||||
pub word_docids: Database<Str, RoaringBitmapCodec>,
|
pub word_docids: Database<Str, RoaringBitmapCodec>,
|
||||||
@ -125,7 +125,7 @@ pub struct Index {
|
|||||||
pub field_id_docid_facet_strings: Database<FieldDocIdFacetStringCodec, Str>,
|
pub field_id_docid_facet_strings: Database<FieldDocIdFacetStringCodec, Str>,
|
||||||
|
|
||||||
/// Maps the document id to the document as an obkv store.
|
/// Maps the document id to the document as an obkv store.
|
||||||
pub documents: Database<OwnedType<BEU32>, ObkvCodec>,
|
pub(crate) documents: Database<OwnedType<BEU32>, ObkvCodec>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Index {
|
impl Index {
|
||||||
|
Loading…
Reference in New Issue
Block a user