mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-30 00:34:26 +01:00
Add database in infos crate
This commit is contained in:
parent
c701f8bf36
commit
b8e6db0feb
@ -23,6 +23,7 @@ const WORD_PAIR_PROXIMITY_DOCIDS_DB_NAME: &str = "word-pair-proximity-docids";
|
|||||||
const WORD_PREFIX_PAIR_PROXIMITY_DOCIDS_DB_NAME: &str = "word-prefix-pair-proximity-docids";
|
const WORD_PREFIX_PAIR_PROXIMITY_DOCIDS_DB_NAME: &str = "word-prefix-pair-proximity-docids";
|
||||||
const WORD_LEVEL_POSITION_DOCIDS_DB_NAME: &str = "word-level-position-docids";
|
const WORD_LEVEL_POSITION_DOCIDS_DB_NAME: &str = "word-level-position-docids";
|
||||||
const WORD_PREFIX_LEVEL_POSITION_DOCIDS_DB_NAME: &str = "word-prefix-level-position-docids";
|
const WORD_PREFIX_LEVEL_POSITION_DOCIDS_DB_NAME: &str = "word-prefix-level-position-docids";
|
||||||
|
const FIELD_ID_WORD_COUNT_DOCIDS_DB_NAME: &str = "field-id-word-count-docids";
|
||||||
const FACET_ID_F64_DOCIDS_DB_NAME: &str = "facet-id-f64-docids";
|
const FACET_ID_F64_DOCIDS_DB_NAME: &str = "facet-id-f64-docids";
|
||||||
const FACET_ID_STRING_DOCIDS_DB_NAME: &str = "facet-id-string-docids";
|
const FACET_ID_STRING_DOCIDS_DB_NAME: &str = "facet-id-string-docids";
|
||||||
const FIELD_ID_DOCID_FACET_F64S_DB_NAME: &str = "field-id-docid-facet-f64s";
|
const FIELD_ID_DOCID_FACET_F64S_DB_NAME: &str = "field-id-docid-facet-f64s";
|
||||||
@ -39,6 +40,7 @@ const ALL_DATABASE_NAMES: &[&str] = &[
|
|||||||
WORD_PREFIX_PAIR_PROXIMITY_DOCIDS_DB_NAME,
|
WORD_PREFIX_PAIR_PROXIMITY_DOCIDS_DB_NAME,
|
||||||
WORD_LEVEL_POSITION_DOCIDS_DB_NAME,
|
WORD_LEVEL_POSITION_DOCIDS_DB_NAME,
|
||||||
WORD_PREFIX_LEVEL_POSITION_DOCIDS_DB_NAME,
|
WORD_PREFIX_LEVEL_POSITION_DOCIDS_DB_NAME,
|
||||||
|
FIELD_ID_WORD_COUNT_DOCIDS_DB_NAME,
|
||||||
FACET_ID_F64_DOCIDS_DB_NAME,
|
FACET_ID_F64_DOCIDS_DB_NAME,
|
||||||
FACET_ID_STRING_DOCIDS_DB_NAME,
|
FACET_ID_STRING_DOCIDS_DB_NAME,
|
||||||
FIELD_ID_DOCID_FACET_F64S_DB_NAME,
|
FIELD_ID_DOCID_FACET_F64S_DB_NAME,
|
||||||
@ -155,6 +157,15 @@ enum Command {
|
|||||||
prefixes: Vec<String>,
|
prefixes: Vec<String>,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
FieldIdWordCountDocids {
|
||||||
|
/// Display the whole documents ids in details.
|
||||||
|
#[structopt(long)]
|
||||||
|
full_display: bool,
|
||||||
|
|
||||||
|
/// The field name in the document.
|
||||||
|
field_name: String,
|
||||||
|
},
|
||||||
|
|
||||||
/// Outputs a CSV with the documents ids, words and the positions where this word appears.
|
/// Outputs a CSV with the documents ids, words and the positions where this word appears.
|
||||||
DocidsWordsPositions {
|
DocidsWordsPositions {
|
||||||
/// Display the whole positions in detail.
|
/// Display the whole positions in detail.
|
||||||
@ -271,6 +282,9 @@ fn main() -> anyhow::Result<()> {
|
|||||||
WordPrefixesLevelPositionsDocids { full_display, prefixes } => {
|
WordPrefixesLevelPositionsDocids { full_display, prefixes } => {
|
||||||
word_prefixes_level_positions_docids(&index, &rtxn, !full_display, prefixes)
|
word_prefixes_level_positions_docids(&index, &rtxn, !full_display, prefixes)
|
||||||
},
|
},
|
||||||
|
FieldIdWordCountDocids { full_display, field_name } => {
|
||||||
|
field_id_word_count_docids(&index, &rtxn, !full_display, field_name)
|
||||||
|
},
|
||||||
DocidsWordsPositions { full_display, internal_documents_ids } => {
|
DocidsWordsPositions { full_display, internal_documents_ids } => {
|
||||||
docids_words_positions(&index, &rtxn, !full_display, internal_documents_ids)
|
docids_words_positions(&index, &rtxn, !full_display, internal_documents_ids)
|
||||||
},
|
},
|
||||||
@ -357,6 +371,7 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho
|
|||||||
word_prefix_pair_proximity_docids,
|
word_prefix_pair_proximity_docids,
|
||||||
word_level_position_docids,
|
word_level_position_docids,
|
||||||
word_prefix_level_position_docids,
|
word_prefix_level_position_docids,
|
||||||
|
field_id_word_count_docids,
|
||||||
facet_id_f64_docids,
|
facet_id_f64_docids,
|
||||||
facet_id_string_docids,
|
facet_id_string_docids,
|
||||||
field_id_docid_facet_f64s: _,
|
field_id_docid_facet_f64s: _,
|
||||||
@ -372,6 +387,7 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho
|
|||||||
let word_pair_proximity_docids_name = "word_pair_proximity_docids";
|
let word_pair_proximity_docids_name = "word_pair_proximity_docids";
|
||||||
let word_level_position_docids_name = "word_level_position_docids";
|
let word_level_position_docids_name = "word_level_position_docids";
|
||||||
let word_prefix_level_position_docids_name = "word_prefix_level_position_docids";
|
let word_prefix_level_position_docids_name = "word_prefix_level_position_docids";
|
||||||
|
let field_id_word_count_docids_name = "field_id_word_count_docids";
|
||||||
let facet_id_f64_docids_name = "facet_id_f64_docids";
|
let facet_id_f64_docids_name = "facet_id_f64_docids";
|
||||||
let facet_id_string_docids_name = "facet_id_string_docids";
|
let facet_id_string_docids_name = "facet_id_string_docids";
|
||||||
let documents_name = "documents";
|
let documents_name = "documents";
|
||||||
@ -443,6 +459,13 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho
|
|||||||
if heap.len() > limit { heap.pop(); }
|
if heap.len() > limit { heap.pop(); }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for result in field_id_word_count_docids.remap_data_type::<ByteSlice>().iter(rtxn)? {
|
||||||
|
let ((field_id, word_count), docids) = result?;
|
||||||
|
let key = format!("{} {}", field_id, word_count);
|
||||||
|
heap.push(Reverse((docids.len(), key, field_id_word_count_docids_name)));
|
||||||
|
if heap.len() > limit { heap.pop(); }
|
||||||
|
}
|
||||||
|
|
||||||
let faceted_fields = index.faceted_fields_ids(rtxn)?;
|
let faceted_fields = index.faceted_fields_ids(rtxn)?;
|
||||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||||
|
|
||||||
@ -676,6 +699,39 @@ fn word_prefixes_level_positions_docids(
|
|||||||
Ok(wtr.flush()?)
|
Ok(wtr.flush()?)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn field_id_word_count_docids(
|
||||||
|
index: &Index,
|
||||||
|
rtxn: &heed::RoTxn,
|
||||||
|
debug: bool,
|
||||||
|
field_name: String
|
||||||
|
) -> anyhow::Result<()>
|
||||||
|
{
|
||||||
|
let stdout = io::stdout();
|
||||||
|
let mut wtr = csv::Writer::from_writer(stdout.lock());
|
||||||
|
wtr.write_record(&["field_name", "word_count", "docids"])?;
|
||||||
|
|
||||||
|
let field_id = index.fields_ids_map(rtxn)?
|
||||||
|
.id(&field_name)
|
||||||
|
.with_context(|| format!("unknown field name: {}", &field_name))?;
|
||||||
|
|
||||||
|
let left = (field_id, 1);
|
||||||
|
let right = (field_id, 11);
|
||||||
|
let iter = index.field_id_word_count_docids
|
||||||
|
.range(rtxn, &(left..=right))?;
|
||||||
|
|
||||||
|
for result in iter {
|
||||||
|
let ((_, word_count), docids) = result?;
|
||||||
|
let docids = if debug {
|
||||||
|
format!("{:?}", docids)
|
||||||
|
} else {
|
||||||
|
format!("{:?}", docids.iter().collect::<Vec<_>>())
|
||||||
|
};
|
||||||
|
wtr.write_record(&[&field_name, &format!("{}", word_count), &docids])?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(wtr.flush()?)
|
||||||
|
}
|
||||||
|
|
||||||
fn docids_words_positions(
|
fn docids_words_positions(
|
||||||
index: &Index,
|
index: &Index,
|
||||||
rtxn: &heed::RoTxn,
|
rtxn: &heed::RoTxn,
|
||||||
@ -870,6 +926,7 @@ fn size_of_databases(index: &Index, rtxn: &heed::RoTxn, names: Vec<String>) -> a
|
|||||||
word_prefix_pair_proximity_docids,
|
word_prefix_pair_proximity_docids,
|
||||||
word_level_position_docids,
|
word_level_position_docids,
|
||||||
word_prefix_level_position_docids,
|
word_prefix_level_position_docids,
|
||||||
|
field_id_word_count_docids,
|
||||||
facet_id_f64_docids,
|
facet_id_f64_docids,
|
||||||
facet_id_string_docids,
|
facet_id_string_docids,
|
||||||
field_id_docid_facet_f64s,
|
field_id_docid_facet_f64s,
|
||||||
@ -893,6 +950,7 @@ fn size_of_databases(index: &Index, rtxn: &heed::RoTxn, names: Vec<String>) -> a
|
|||||||
WORD_PREFIX_PAIR_PROXIMITY_DOCIDS_DB_NAME => word_prefix_pair_proximity_docids.as_polymorph(),
|
WORD_PREFIX_PAIR_PROXIMITY_DOCIDS_DB_NAME => word_prefix_pair_proximity_docids.as_polymorph(),
|
||||||
WORD_LEVEL_POSITION_DOCIDS_DB_NAME => word_level_position_docids.as_polymorph(),
|
WORD_LEVEL_POSITION_DOCIDS_DB_NAME => word_level_position_docids.as_polymorph(),
|
||||||
WORD_PREFIX_LEVEL_POSITION_DOCIDS_DB_NAME => word_prefix_level_position_docids.as_polymorph(),
|
WORD_PREFIX_LEVEL_POSITION_DOCIDS_DB_NAME => word_prefix_level_position_docids.as_polymorph(),
|
||||||
|
FIELD_ID_WORD_COUNT_DOCIDS_DB_NAME => field_id_word_count_docids.as_polymorph(),
|
||||||
FACET_ID_F64_DOCIDS_DB_NAME => facet_id_f64_docids.as_polymorph(),
|
FACET_ID_F64_DOCIDS_DB_NAME => facet_id_f64_docids.as_polymorph(),
|
||||||
FACET_ID_STRING_DOCIDS_DB_NAME => facet_id_string_docids.as_polymorph(),
|
FACET_ID_STRING_DOCIDS_DB_NAME => facet_id_string_docids.as_polymorph(),
|
||||||
FIELD_ID_DOCID_FACET_F64S_DB_NAME => field_id_docid_facet_f64s.as_polymorph(),
|
FIELD_ID_DOCID_FACET_F64S_DB_NAME => field_id_docid_facet_f64s.as_polymorph(),
|
||||||
@ -999,6 +1057,10 @@ fn database_stats(index: &Index, rtxn: &heed::RoTxn, name: &str) -> anyhow::Resu
|
|||||||
let db = index.word_prefix_pair_proximity_docids.as_polymorph();
|
let db = index.word_prefix_pair_proximity_docids.as_polymorph();
|
||||||
compute_stats::<CboRoaringBitmapCodec>(*db, rtxn, name)
|
compute_stats::<CboRoaringBitmapCodec>(*db, rtxn, name)
|
||||||
},
|
},
|
||||||
|
FIELD_ID_WORD_COUNT_DOCIDS_DB_NAME => {
|
||||||
|
let db = index.field_id_word_count_docids.as_polymorph();
|
||||||
|
compute_stats::<CboRoaringBitmapCodec>(*db, rtxn, name)
|
||||||
|
},
|
||||||
unknown => anyhow::bail!("unknown database {:?}", unknown),
|
unknown => anyhow::bail!("unknown database {:?}", unknown),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user