mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-24 21:50:07 +01:00
Update infos crate
This commit is contained in:
parent
3296bb243c
commit
5ed75de0db
@ -7,7 +7,7 @@ use byte_unit::Byte;
|
|||||||
use heed::EnvOpenOptions;
|
use heed::EnvOpenOptions;
|
||||||
use milli::facet::FacetType;
|
use milli::facet::FacetType;
|
||||||
use milli::index::db_name::*;
|
use milli::index::db_name::*;
|
||||||
use milli::{FieldId, Index, TreeLevel};
|
use milli::{FieldId, Index};
|
||||||
use structopt::StructOpt;
|
use structopt::StructOpt;
|
||||||
use Command::*;
|
use Command::*;
|
||||||
|
|
||||||
@ -22,8 +22,8 @@ const ALL_DATABASE_NAMES: &[&str] = &[
|
|||||||
DOCID_WORD_POSITIONS,
|
DOCID_WORD_POSITIONS,
|
||||||
WORD_PAIR_PROXIMITY_DOCIDS,
|
WORD_PAIR_PROXIMITY_DOCIDS,
|
||||||
WORD_PREFIX_PAIR_PROXIMITY_DOCIDS,
|
WORD_PREFIX_PAIR_PROXIMITY_DOCIDS,
|
||||||
WORD_LEVEL_POSITION_DOCIDS,
|
WORD_POSITION_DOCIDS,
|
||||||
WORD_PREFIX_LEVEL_POSITION_DOCIDS,
|
WORD_PREFIX_POSITION_DOCIDS,
|
||||||
FIELD_ID_WORD_COUNT_DOCIDS,
|
FIELD_ID_WORD_COUNT_DOCIDS,
|
||||||
FACET_ID_F64_DOCIDS,
|
FACET_ID_F64_DOCIDS,
|
||||||
FACET_ID_STRING_DOCIDS,
|
FACET_ID_STRING_DOCIDS,
|
||||||
@ -281,10 +281,10 @@ fn main() -> anyhow::Result<()> {
|
|||||||
facet_values_docids(&index, &rtxn, !full_display, FacetType::String, field_name)
|
facet_values_docids(&index, &rtxn, !full_display, FacetType::String, field_name)
|
||||||
}
|
}
|
||||||
WordsLevelPositionsDocids { full_display, words } => {
|
WordsLevelPositionsDocids { full_display, words } => {
|
||||||
words_level_positions_docids(&index, &rtxn, !full_display, words)
|
words_positions_docids(&index, &rtxn, !full_display, words)
|
||||||
}
|
}
|
||||||
WordPrefixesLevelPositionsDocids { full_display, prefixes } => {
|
WordPrefixesLevelPositionsDocids { full_display, prefixes } => {
|
||||||
word_prefixes_level_positions_docids(&index, &rtxn, !full_display, prefixes)
|
word_prefixes_positions_docids(&index, &rtxn, !full_display, prefixes)
|
||||||
}
|
}
|
||||||
FieldIdWordCountDocids { full_display, field_name } => {
|
FieldIdWordCountDocids { full_display, field_name } => {
|
||||||
field_id_word_count_docids(&index, &rtxn, !full_display, field_name)
|
field_id_word_count_docids(&index, &rtxn, !full_display, field_name)
|
||||||
@ -379,8 +379,8 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho
|
|||||||
docid_word_positions,
|
docid_word_positions,
|
||||||
word_pair_proximity_docids,
|
word_pair_proximity_docids,
|
||||||
word_prefix_pair_proximity_docids,
|
word_prefix_pair_proximity_docids,
|
||||||
word_level_position_docids,
|
word_position_docids,
|
||||||
word_prefix_level_position_docids,
|
word_prefix_position_docids,
|
||||||
field_id_word_count_docids,
|
field_id_word_count_docids,
|
||||||
facet_id_f64_docids,
|
facet_id_f64_docids,
|
||||||
facet_id_string_docids,
|
facet_id_string_docids,
|
||||||
@ -395,8 +395,8 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho
|
|||||||
let docid_word_positions_name = "docid_word_positions";
|
let docid_word_positions_name = "docid_word_positions";
|
||||||
let word_prefix_pair_proximity_docids_name = "word_prefix_pair_proximity_docids";
|
let word_prefix_pair_proximity_docids_name = "word_prefix_pair_proximity_docids";
|
||||||
let word_pair_proximity_docids_name = "word_pair_proximity_docids";
|
let word_pair_proximity_docids_name = "word_pair_proximity_docids";
|
||||||
let word_level_position_docids_name = "word_level_position_docids";
|
let word_position_docids_name = "word_position_docids";
|
||||||
let word_prefix_level_position_docids_name = "word_prefix_level_position_docids";
|
let word_prefix_position_docids_name = "word_prefix_position_docids";
|
||||||
let field_id_word_count_docids_name = "field_id_word_count_docids";
|
let field_id_word_count_docids_name = "field_id_word_count_docids";
|
||||||
let facet_id_f64_docids_name = "facet_id_f64_docids";
|
let facet_id_f64_docids_name = "facet_id_f64_docids";
|
||||||
let facet_id_string_docids_name = "facet_id_string_docids";
|
let facet_id_string_docids_name = "facet_id_string_docids";
|
||||||
@ -471,19 +471,19 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for result in word_level_position_docids.remap_data_type::<ByteSlice>().iter(rtxn)? {
|
for result in word_position_docids.remap_data_type::<ByteSlice>().iter(rtxn)? {
|
||||||
let ((word, level, left, right), value) = result?;
|
let ((word, pos), value) = result?;
|
||||||
let key = format!("{} {} {:?}", word, level, left..=right);
|
let key = format!("{} {}", word, pos);
|
||||||
heap.push(Reverse((value.len(), key, word_level_position_docids_name)));
|
heap.push(Reverse((value.len(), key, word_position_docids_name)));
|
||||||
if heap.len() > limit {
|
if heap.len() > limit {
|
||||||
heap.pop();
|
heap.pop();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for result in word_prefix_level_position_docids.remap_data_type::<ByteSlice>().iter(rtxn)? {
|
for result in word_prefix_position_docids.remap_data_type::<ByteSlice>().iter(rtxn)? {
|
||||||
let ((word, level, left, right), value) = result?;
|
let ((word, pos), value) = result?;
|
||||||
let key = format!("{} {} {:?}", word, level, left..=right);
|
let key = format!("{} {}", word, pos);
|
||||||
heap.push(Reverse((value.len(), key, word_prefix_level_position_docids_name)));
|
heap.push(Reverse((value.len(), key, word_prefix_position_docids_name)));
|
||||||
if heap.len() > limit {
|
if heap.len() > limit {
|
||||||
heap.pop();
|
heap.pop();
|
||||||
}
|
}
|
||||||
@ -663,7 +663,7 @@ fn facet_values_docids(
|
|||||||
Ok(wtr.flush()?)
|
Ok(wtr.flush()?)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn words_level_positions_docids(
|
fn words_positions_docids(
|
||||||
index: &Index,
|
index: &Index,
|
||||||
rtxn: &heed::RoTxn,
|
rtxn: &heed::RoTxn,
|
||||||
debug: bool,
|
debug: bool,
|
||||||
@ -671,16 +671,16 @@ fn words_level_positions_docids(
|
|||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
let stdout = io::stdout();
|
let stdout = io::stdout();
|
||||||
let mut wtr = csv::Writer::from_writer(stdout.lock());
|
let mut wtr = csv::Writer::from_writer(stdout.lock());
|
||||||
wtr.write_record(&["word", "level", "positions", "documents_count", "documents_ids"])?;
|
wtr.write_record(&["word", "position", "documents_count", "documents_ids"])?;
|
||||||
|
|
||||||
for word in words.iter().map(AsRef::as_ref) {
|
for word in words.iter().map(AsRef::as_ref) {
|
||||||
let range = {
|
let range = {
|
||||||
let left = (word, TreeLevel::min_value(), u32::min_value(), u32::min_value());
|
let left = (word, u32::min_value());
|
||||||
let right = (word, TreeLevel::max_value(), u32::max_value(), u32::max_value());
|
let right = (word, u32::max_value());
|
||||||
left..=right
|
left..=right
|
||||||
};
|
};
|
||||||
for result in index.word_level_position_docids.range(rtxn, &range)? {
|
for result in index.word_position_docids.range(rtxn, &range)? {
|
||||||
let ((w, level, left, right), docids) = result?;
|
let ((w, pos), docids) = result?;
|
||||||
|
|
||||||
let count = docids.len().to_string();
|
let count = docids.len().to_string();
|
||||||
let docids = if debug {
|
let docids = if debug {
|
||||||
@ -688,20 +688,15 @@ fn words_level_positions_docids(
|
|||||||
} else {
|
} else {
|
||||||
format!("{:?}", docids.iter().collect::<Vec<_>>())
|
format!("{:?}", docids.iter().collect::<Vec<_>>())
|
||||||
};
|
};
|
||||||
let position_range = if level == TreeLevel::min_value() {
|
let position = format!("{:?}", pos);
|
||||||
format!("{:?}", left)
|
wtr.write_record(&[w, &position, &count, &docids])?;
|
||||||
} else {
|
|
||||||
format!("{:?}", left..=right)
|
|
||||||
};
|
|
||||||
let level = level.to_string();
|
|
||||||
wtr.write_record(&[w, &level, &position_range, &count, &docids])?;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(wtr.flush()?)
|
Ok(wtr.flush()?)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn word_prefixes_level_positions_docids(
|
fn word_prefixes_positions_docids(
|
||||||
index: &Index,
|
index: &Index,
|
||||||
rtxn: &heed::RoTxn,
|
rtxn: &heed::RoTxn,
|
||||||
debug: bool,
|
debug: bool,
|
||||||
@ -709,16 +704,16 @@ fn word_prefixes_level_positions_docids(
|
|||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
let stdout = io::stdout();
|
let stdout = io::stdout();
|
||||||
let mut wtr = csv::Writer::from_writer(stdout.lock());
|
let mut wtr = csv::Writer::from_writer(stdout.lock());
|
||||||
wtr.write_record(&["prefix", "level", "positions", "documents_count", "documents_ids"])?;
|
wtr.write_record(&["prefix", "position", "documents_count", "documents_ids"])?;
|
||||||
|
|
||||||
for word in prefixes.iter().map(AsRef::as_ref) {
|
for word in prefixes.iter().map(AsRef::as_ref) {
|
||||||
let range = {
|
let range = {
|
||||||
let left = (word, TreeLevel::min_value(), u32::min_value(), u32::min_value());
|
let left = (word, u32::min_value());
|
||||||
let right = (word, TreeLevel::max_value(), u32::max_value(), u32::max_value());
|
let right = (word, u32::max_value());
|
||||||
left..=right
|
left..=right
|
||||||
};
|
};
|
||||||
for result in index.word_prefix_level_position_docids.range(rtxn, &range)? {
|
for result in index.word_prefix_position_docids.range(rtxn, &range)? {
|
||||||
let ((w, level, left, right), docids) = result?;
|
let ((w, pos), docids) = result?;
|
||||||
|
|
||||||
let count = docids.len().to_string();
|
let count = docids.len().to_string();
|
||||||
let docids = if debug {
|
let docids = if debug {
|
||||||
@ -726,13 +721,8 @@ fn word_prefixes_level_positions_docids(
|
|||||||
} else {
|
} else {
|
||||||
format!("{:?}", docids.iter().collect::<Vec<_>>())
|
format!("{:?}", docids.iter().collect::<Vec<_>>())
|
||||||
};
|
};
|
||||||
let position_range = if level == TreeLevel::min_value() {
|
let position = format!("{:?}", pos);
|
||||||
format!("{:?}", left)
|
wtr.write_record(&[w, &position, &count, &docids])?;
|
||||||
} else {
|
|
||||||
format!("{:?}", left..=right)
|
|
||||||
};
|
|
||||||
let level = level.to_string();
|
|
||||||
wtr.write_record(&[w, &level, &position_range, &count, &docids])?;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -970,8 +960,8 @@ fn size_of_databases(index: &Index, rtxn: &heed::RoTxn, names: Vec<String>) -> a
|
|||||||
docid_word_positions,
|
docid_word_positions,
|
||||||
word_pair_proximity_docids,
|
word_pair_proximity_docids,
|
||||||
word_prefix_pair_proximity_docids,
|
word_prefix_pair_proximity_docids,
|
||||||
word_level_position_docids,
|
word_position_docids,
|
||||||
word_prefix_level_position_docids,
|
word_prefix_position_docids,
|
||||||
field_id_word_count_docids,
|
field_id_word_count_docids,
|
||||||
facet_id_f64_docids,
|
facet_id_f64_docids,
|
||||||
facet_id_string_docids,
|
facet_id_string_docids,
|
||||||
@ -994,8 +984,8 @@ fn size_of_databases(index: &Index, rtxn: &heed::RoTxn, names: Vec<String>) -> a
|
|||||||
DOCID_WORD_POSITIONS => docid_word_positions.as_polymorph(),
|
DOCID_WORD_POSITIONS => docid_word_positions.as_polymorph(),
|
||||||
WORD_PAIR_PROXIMITY_DOCIDS => word_pair_proximity_docids.as_polymorph(),
|
WORD_PAIR_PROXIMITY_DOCIDS => word_pair_proximity_docids.as_polymorph(),
|
||||||
WORD_PREFIX_PAIR_PROXIMITY_DOCIDS => word_prefix_pair_proximity_docids.as_polymorph(),
|
WORD_PREFIX_PAIR_PROXIMITY_DOCIDS => word_prefix_pair_proximity_docids.as_polymorph(),
|
||||||
WORD_LEVEL_POSITION_DOCIDS => word_level_position_docids.as_polymorph(),
|
WORD_POSITION_DOCIDS => word_position_docids.as_polymorph(),
|
||||||
WORD_PREFIX_LEVEL_POSITION_DOCIDS => word_prefix_level_position_docids.as_polymorph(),
|
WORD_PREFIX_POSITION_DOCIDS => word_prefix_position_docids.as_polymorph(),
|
||||||
FIELD_ID_WORD_COUNT_DOCIDS => field_id_word_count_docids.as_polymorph(),
|
FIELD_ID_WORD_COUNT_DOCIDS => field_id_word_count_docids.as_polymorph(),
|
||||||
FACET_ID_F64_DOCIDS => facet_id_f64_docids.as_polymorph(),
|
FACET_ID_F64_DOCIDS => facet_id_f64_docids.as_polymorph(),
|
||||||
FACET_ID_STRING_DOCIDS => facet_id_string_docids.as_polymorph(),
|
FACET_ID_STRING_DOCIDS => facet_id_string_docids.as_polymorph(),
|
||||||
|
Loading…
x
Reference in New Issue
Block a user