mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-23 13:24:27 +01:00
Reduce the amount of duplicated code to iterate over facet values
This commit is contained in:
parent
07a0c82790
commit
d40dd3e4da
@ -1,5 +1,5 @@
|
|||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::{str, io};
|
use std::{str, io, fmt};
|
||||||
|
|
||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
use heed::EnvOpenOptions;
|
use heed::EnvOpenOptions;
|
||||||
@ -232,12 +232,17 @@ fn most_common_words(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyhow:
|
|||||||
Ok(wtr.flush()?)
|
Ok(wtr.flush()?)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn facet_values_iter<'txn, DC: 'txn>(
|
/// Helper function that converts the facet value key to a unique type
|
||||||
|
/// that can be used to log or display purposes.
|
||||||
|
fn facet_values_iter<'txn, DC: 'txn, T>(
|
||||||
rtxn: &'txn heed::RoTxn,
|
rtxn: &'txn heed::RoTxn,
|
||||||
db: heed::Database<heed::types::ByteSlice, DC>,
|
db: heed::Database<heed::types::ByteSlice, DC>,
|
||||||
field_id: u8,
|
field_id: u8,
|
||||||
facet_type: crate::facet::FacetType,
|
facet_type: crate::facet::FacetType,
|
||||||
) -> heed::Result<Box<dyn Iterator<Item=heed::Result<(String, DC::DItem)>> + 'txn>>
|
string_fn: impl Fn(&str) -> T + 'txn,
|
||||||
|
float_fn: impl Fn(u8, f64, f64) -> T + 'txn,
|
||||||
|
integer_fn: impl Fn(u8, i64, i64) -> T + 'txn,
|
||||||
|
) -> heed::Result<Box<dyn Iterator<Item=heed::Result<(T, DC::DItem)>> + 'txn>>
|
||||||
where
|
where
|
||||||
DC: heed::BytesDecode<'txn>,
|
DC: heed::BytesDecode<'txn>,
|
||||||
{
|
{
|
||||||
@ -250,30 +255,34 @@ where
|
|||||||
match facet_type {
|
match facet_type {
|
||||||
FacetType::String => {
|
FacetType::String => {
|
||||||
let iter = iter.remap_key_type::<FacetValueStringCodec>()
|
let iter = iter.remap_key_type::<FacetValueStringCodec>()
|
||||||
.map(|r| r.map(|((_, key), value)| (key.to_string(), value)));
|
.map(move |r| r.map(|((_, key), value)| (string_fn(key), value)));
|
||||||
Ok(Box::new(iter) as Box<dyn Iterator<Item=_>>)
|
Ok(Box::new(iter) as Box<dyn Iterator<Item=_>>)
|
||||||
},
|
},
|
||||||
FacetType::Float => {
|
FacetType::Float => {
|
||||||
let iter = iter.remap_key_type::<FacetLevelValueF64Codec>()
|
let iter = iter.remap_key_type::<FacetLevelValueF64Codec>()
|
||||||
.map(|r| r.map(|((_, level, left, right), value)| if level == 0 {
|
.map(move |r| r.map(|((_, level, left, right), value)| {
|
||||||
(format!("{} (level {})", left, level), value)
|
(float_fn(level, left, right), value)
|
||||||
} else {
|
|
||||||
(format!("{} to {} (level {})", left, right, level), value)
|
|
||||||
}));
|
}));
|
||||||
Ok(Box::new(iter))
|
Ok(Box::new(iter))
|
||||||
},
|
},
|
||||||
FacetType::Integer => {
|
FacetType::Integer => {
|
||||||
let iter = iter.remap_key_type::<FacetLevelValueI64Codec>()
|
let iter = iter.remap_key_type::<FacetLevelValueI64Codec>()
|
||||||
.map(|r| r.map(|((_, level, left, right), value)| if level == 0 {
|
.map(move |r| r.map(|((_, level, left, right), value)| {
|
||||||
(format!("{} (level {})", left, level), value)
|
(integer_fn(level, left, right), value)
|
||||||
} else {
|
|
||||||
(format!("{} to {} (level {})", left, right, level), value)
|
|
||||||
}));
|
}));
|
||||||
Ok(Box::new(iter))
|
Ok(Box::new(iter))
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn facet_number_value_to_string<T: fmt::Debug>(level: u8, left: T, right: T) -> String {
|
||||||
|
if level == 0 {
|
||||||
|
format!("{:?} (level {})", left, level)
|
||||||
|
} else {
|
||||||
|
format!("{:?} to {:?} (level {})", left, right, level)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyhow::Result<()> {
|
fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyhow::Result<()> {
|
||||||
use std::cmp::Reverse;
|
use std::cmp::Reverse;
|
||||||
use std::collections::BinaryHeap;
|
use std::collections::BinaryHeap;
|
||||||
@ -334,7 +343,17 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho
|
|||||||
let facet_name = fields_ids_map.name(field_id).unwrap();
|
let facet_name = fields_ids_map.name(field_id).unwrap();
|
||||||
|
|
||||||
let db = facet_field_id_value_docids.remap_data_type::<ByteSlice>();
|
let db = facet_field_id_value_docids.remap_data_type::<ByteSlice>();
|
||||||
for result in facet_values_iter(rtxn, db, field_id, field_type)? {
|
let iter = facet_values_iter(
|
||||||
|
rtxn,
|
||||||
|
db,
|
||||||
|
field_id,
|
||||||
|
field_type,
|
||||||
|
|key| key.to_owned(),
|
||||||
|
facet_number_value_to_string,
|
||||||
|
facet_number_value_to_string,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
for result in iter {
|
||||||
let (fvalue, value) = result?;
|
let (fvalue, value) = result?;
|
||||||
let key = format!("{} {}", facet_name, fvalue);
|
let key = format!("{} {}", facet_name, fvalue);
|
||||||
heap.push(Reverse((value.len(), key, facet_field_id_value_docids_name)));
|
heap.push(Reverse((value.len(), key, facet_field_id_value_docids_name)));
|
||||||
@ -393,7 +412,17 @@ fn facet_values_docids(index: &Index, rtxn: &heed::RoTxn, debug: bool, field_nam
|
|||||||
wtr.write_record(&["facet_value", "documents_ids"])?;
|
wtr.write_record(&["facet_value", "documents_ids"])?;
|
||||||
|
|
||||||
let db = index.facet_field_id_value_docids;
|
let db = index.facet_field_id_value_docids;
|
||||||
for result in facet_values_iter(rtxn, db, field_id, *field_type)? {
|
let iter = facet_values_iter(
|
||||||
|
rtxn,
|
||||||
|
db,
|
||||||
|
field_id,
|
||||||
|
*field_type,
|
||||||
|
|key| key.to_owned(),
|
||||||
|
facet_number_value_to_string,
|
||||||
|
facet_number_value_to_string,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
for result in iter {
|
||||||
let (value, docids) = result?;
|
let (value, docids) = result?;
|
||||||
let docids = if debug {
|
let docids = if debug {
|
||||||
format!("{:?}", docids)
|
format!("{:?}", docids)
|
||||||
@ -407,12 +436,6 @@ fn facet_values_docids(index: &Index, rtxn: &heed::RoTxn, debug: bool, field_nam
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn facet_stats(index: &Index, rtxn: &heed::RoTxn, field_name: String) -> anyhow::Result<()> {
|
fn facet_stats(index: &Index, rtxn: &heed::RoTxn, field_name: String) -> anyhow::Result<()> {
|
||||||
use heed::types::ByteSlice;
|
|
||||||
use crate::facet::FacetType;
|
|
||||||
use crate::heed_codec::facet::{
|
|
||||||
FacetValueStringCodec, FacetLevelValueF64Codec, FacetLevelValueI64Codec,
|
|
||||||
};
|
|
||||||
|
|
||||||
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||||
let faceted_fields = index.faceted_fields(&rtxn)?;
|
let faceted_fields = index.faceted_fields(&rtxn)?;
|
||||||
|
|
||||||
@ -421,31 +444,23 @@ fn facet_stats(index: &Index, rtxn: &heed::RoTxn, field_name: String) -> anyhow:
|
|||||||
let field_type = faceted_fields.get(&field_id)
|
let field_type = faceted_fields.get(&field_id)
|
||||||
.with_context(|| format!("field {} is not faceted", field_name))?;
|
.with_context(|| format!("field {} is not faceted", field_name))?;
|
||||||
|
|
||||||
let iter = index.facet_field_id_value_docids.prefix_iter(&rtxn, &[field_id])?;
|
let db = index.facet_field_id_value_docids;
|
||||||
let iter = match field_type {
|
let iter = facet_values_iter(
|
||||||
FacetType::String => {
|
rtxn,
|
||||||
let iter = iter.remap_types::<FacetValueStringCodec, ByteSlice>()
|
db,
|
||||||
.map(|r| r.map(|_| 0u8));
|
field_id,
|
||||||
Box::new(iter) as Box<dyn Iterator<Item=_>>
|
*field_type,
|
||||||
},
|
|_key| 0u8,
|
||||||
FacetType::Float => {
|
|level, _left, _right| level,
|
||||||
let iter = iter.remap_types::<FacetLevelValueF64Codec, ByteSlice>()
|
|level, _left, _right| level,
|
||||||
.map(|r| r.map(|((_, level, _, _), _)| level));
|
)?;
|
||||||
Box::new(iter)
|
|
||||||
},
|
|
||||||
FacetType::Integer => {
|
|
||||||
let iter = iter.remap_types::<FacetLevelValueI64Codec, ByteSlice>()
|
|
||||||
.map(|r| r.map(|((_, level, _, _), _)| level));
|
|
||||||
Box::new(iter)
|
|
||||||
},
|
|
||||||
};
|
|
||||||
|
|
||||||
println!("The database {:?} facet stats", field_name);
|
println!("The database {:?} facet stats", field_name);
|
||||||
|
|
||||||
let mut level_size = 0;
|
let mut level_size = 0;
|
||||||
let mut current_level = None;
|
let mut current_level = None;
|
||||||
for result in iter {
|
for result in iter {
|
||||||
let level = result?;
|
let (level, _) = result?;
|
||||||
if let Some(current) = current_level {
|
if let Some(current) = current_level {
|
||||||
if current != level {
|
if current != level {
|
||||||
println!("\tnumber of groups at level {}: {}", current, level_size);
|
println!("\tnumber of groups at level {}: {}", current, level_size);
|
||||||
|
Loading…
Reference in New Issue
Block a user