mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-27 07:14:26 +01:00
Order the facet values lexicographically
This commit is contained in:
parent
51a37de885
commit
4b9e81fc89
40
src/facet/facet_value.rs
Normal file
40
src/facet/facet_value.rs
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
use ordered_float::OrderedFloat;
|
||||||
|
use serde::{Serialize, Deserialize};
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)]
|
||||||
|
#[derive(Serialize, Deserialize)]
|
||||||
|
pub enum FacetValue {
|
||||||
|
String(String),
|
||||||
|
Float(OrderedFloat<f64>),
|
||||||
|
Integer(i64),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<String> for FacetValue {
|
||||||
|
fn from(string: String) -> FacetValue {
|
||||||
|
FacetValue::String(string)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<&str> for FacetValue {
|
||||||
|
fn from(string: &str) -> FacetValue {
|
||||||
|
FacetValue::String(string.to_owned())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<f64> for FacetValue {
|
||||||
|
fn from(float: f64) -> FacetValue {
|
||||||
|
FacetValue::Float(OrderedFloat(float))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<OrderedFloat<f64>> for FacetValue {
|
||||||
|
fn from(float: OrderedFloat<f64>) -> FacetValue {
|
||||||
|
FacetValue::Float(float)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<i64> for FacetValue {
|
||||||
|
fn from(integer: i64) -> FacetValue {
|
||||||
|
FacetValue::Integer(integer)
|
||||||
|
}
|
||||||
|
}
|
@ -1,12 +1,10 @@
|
|||||||
use std::collections::{HashSet, HashMap};
|
use std::collections::{HashSet, BTreeSet, BTreeMap};
|
||||||
use std::{cmp, fmt};
|
|
||||||
use std::ops::Bound::Unbounded;
|
use std::ops::Bound::Unbounded;
|
||||||
|
use std::{cmp, fmt};
|
||||||
|
|
||||||
use ordered_float::OrderedFloat;
|
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use serde_json::Value;
|
|
||||||
|
|
||||||
use crate::facet::FacetType;
|
use crate::facet::{FacetType, FacetValue};
|
||||||
use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec, FacetLevelValueI64Codec};
|
use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec, FacetLevelValueI64Codec};
|
||||||
use crate::heed_codec::facet::{FieldDocIdFacetStringCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetI64Codec};
|
use crate::heed_codec::facet::{FieldDocIdFacetStringCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetI64Codec};
|
||||||
use crate::search::facet::{FacetIter, FacetRange};
|
use crate::search::facet::{FacetIter, FacetRange};
|
||||||
@ -40,13 +38,13 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
fn facet_values(&self, field_id: FieldId, facet_type: FacetType) -> heed::Result<Vec<Value>> {
|
fn facet_values(&self, field_id: FieldId, facet_type: FacetType) -> heed::Result<BTreeSet<FacetValue>> {
|
||||||
if let Some(candidates) = self.candidates.as_ref() {
|
if let Some(candidates) = self.candidates.as_ref() {
|
||||||
if candidates.len() <= 1000 {
|
if candidates.len() <= 1000 {
|
||||||
let mut key_buffer = vec![field_id];
|
let mut key_buffer = vec![field_id];
|
||||||
match facet_type {
|
match facet_type {
|
||||||
FacetType::Float => {
|
FacetType::Float => {
|
||||||
let mut facet_values = HashSet::new();
|
let mut facet_values = BTreeSet::new();
|
||||||
for docid in candidates {
|
for docid in candidates {
|
||||||
key_buffer.truncate(1);
|
key_buffer.truncate(1);
|
||||||
key_buffer.extend_from_slice(&docid.to_be_bytes());
|
key_buffer.extend_from_slice(&docid.to_be_bytes());
|
||||||
@ -55,13 +53,13 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
.remap_key_type::<FieldDocIdFacetF64Codec>();
|
.remap_key_type::<FieldDocIdFacetF64Codec>();
|
||||||
for result in iter {
|
for result in iter {
|
||||||
let ((_, _, value), ()) = result?;
|
let ((_, _, value), ()) = result?;
|
||||||
facet_values.insert(OrderedFloat(value));
|
facet_values.insert(FacetValue::from(value));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(facet_values.into_iter().map(|f| Value::from(*f)).collect())
|
Ok(facet_values)
|
||||||
},
|
},
|
||||||
FacetType::Integer => {
|
FacetType::Integer => {
|
||||||
let mut facet_values = HashSet::new();
|
let mut facet_values = BTreeSet::new();
|
||||||
for docid in candidates {
|
for docid in candidates {
|
||||||
key_buffer.truncate(1);
|
key_buffer.truncate(1);
|
||||||
key_buffer.extend_from_slice(&docid.to_be_bytes());
|
key_buffer.extend_from_slice(&docid.to_be_bytes());
|
||||||
@ -70,13 +68,13 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
.remap_key_type::<FieldDocIdFacetI64Codec>();
|
.remap_key_type::<FieldDocIdFacetI64Codec>();
|
||||||
for result in iter {
|
for result in iter {
|
||||||
let ((_, _, value), ()) = result?;
|
let ((_, _, value), ()) = result?;
|
||||||
facet_values.insert(value);
|
facet_values.insert(FacetValue::from(value));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(facet_values.into_iter().map(Value::from).collect())
|
Ok(facet_values)
|
||||||
},
|
},
|
||||||
FacetType::String => {
|
FacetType::String => {
|
||||||
let mut facet_values = HashSet::new();
|
let mut facet_values = BTreeSet::new();
|
||||||
for docid in candidates {
|
for docid in candidates {
|
||||||
key_buffer.truncate(1);
|
key_buffer.truncate(1);
|
||||||
key_buffer.extend_from_slice(&docid.to_be_bytes());
|
key_buffer.extend_from_slice(&docid.to_be_bytes());
|
||||||
@ -85,10 +83,10 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
.remap_key_type::<FieldDocIdFacetStringCodec>();
|
.remap_key_type::<FieldDocIdFacetStringCodec>();
|
||||||
for result in iter {
|
for result in iter {
|
||||||
let ((_, _, value), ()) = result?;
|
let ((_, _, value), ()) = result?;
|
||||||
facet_values.insert(value);
|
facet_values.insert(FacetValue::from(value));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(facet_values.into_iter().map(Value::from).collect())
|
Ok(facet_values)
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -98,28 +96,28 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
let iter = db
|
let iter = db
|
||||||
.prefix_iter(self.rtxn, &[field_id])?
|
.prefix_iter(self.rtxn, &[field_id])?
|
||||||
.remap_key_type::<FacetValueStringCodec>()
|
.remap_key_type::<FacetValueStringCodec>()
|
||||||
.map(|r| r.map(|((_, v), docids)| (Value::from(v), docids)));
|
.map(|r| r.map(|((_, v), docids)| (FacetValue::from(v), docids)));
|
||||||
Box::new(iter) as Box::<dyn Iterator<Item=_>>
|
Box::new(iter) as Box::<dyn Iterator<Item=_>>
|
||||||
},
|
},
|
||||||
FacetType::Integer => {
|
FacetType::Integer => {
|
||||||
let iter = FacetIter::<i64, FacetLevelValueI64Codec>::new_non_reducing(
|
let iter = FacetIter::<i64, FacetLevelValueI64Codec>::new_non_reducing(
|
||||||
self.rtxn, self.index, field_id, candidates.clone(),
|
self.rtxn, self.index, field_id, candidates.clone(),
|
||||||
)?;
|
)?;
|
||||||
Box::new(iter.map(|r| r.map(|(v, docids)| (Value::from(v), docids))))
|
Box::new(iter.map(|r| r.map(|(v, docids)| (FacetValue::from(v), docids))))
|
||||||
},
|
},
|
||||||
FacetType::Float => {
|
FacetType::Float => {
|
||||||
let iter = FacetIter::<f64, FacetLevelValueF64Codec>::new_non_reducing(
|
let iter = FacetIter::<f64, FacetLevelValueF64Codec>::new_non_reducing(
|
||||||
self.rtxn, self.index, field_id, candidates.clone(),
|
self.rtxn, self.index, field_id, candidates.clone(),
|
||||||
)?;
|
)?;
|
||||||
Box::new(iter.map(|r| r.map(|(v, docids)| (Value::from(v), docids))))
|
Box::new(iter.map(|r| r.map(|(v, docids)| (FacetValue::from(v), docids))))
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut facet_values = Vec::new();
|
let mut facet_values = BTreeSet::new();
|
||||||
for result in iter {
|
for result in iter {
|
||||||
let (value, docids) = result?;
|
let (value, docids) = result?;
|
||||||
if self.candidates.as_ref().map_or(true, |c| docids.is_disjoint(c)) {
|
if self.candidates.as_ref().map_or(true, |c| docids.is_disjoint(c)) {
|
||||||
facet_values.push(value);
|
facet_values.insert(value);
|
||||||
}
|
}
|
||||||
if facet_values.len() == self.max_values_by_facet {
|
if facet_values.len() == self.max_values_by_facet {
|
||||||
break;
|
break;
|
||||||
@ -135,7 +133,7 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
let iter = db
|
let iter = db
|
||||||
.prefix_iter(self.rtxn, &[field_id])?
|
.prefix_iter(self.rtxn, &[field_id])?
|
||||||
.remap_key_type::<FacetValueStringCodec>()
|
.remap_key_type::<FacetValueStringCodec>()
|
||||||
.map(|r| r.map(|((_, v), docids)| (Value::from(v), docids)));
|
.map(|r| r.map(|((_, v), docids)| (FacetValue::from(v), docids)));
|
||||||
Box::new(iter) as Box::<dyn Iterator<Item=_>>
|
Box::new(iter) as Box::<dyn Iterator<Item=_>>
|
||||||
},
|
},
|
||||||
FacetType::Integer => {
|
FacetType::Integer => {
|
||||||
@ -143,22 +141,22 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
let range = FacetRange::<i64, _>::new(
|
let range = FacetRange::<i64, _>::new(
|
||||||
self.rtxn, db, field_id, 0, Unbounded, Unbounded,
|
self.rtxn, db, field_id, 0, Unbounded, Unbounded,
|
||||||
)?;
|
)?;
|
||||||
Box::new(range.map(|r| r.map(|((_, _, v, _), docids)| (Value::from(v), docids))))
|
Box::new(range.map(|r| r.map(|((_, _, v, _), docids)| (FacetValue::from(v), docids))))
|
||||||
},
|
},
|
||||||
FacetType::Float => {
|
FacetType::Float => {
|
||||||
let db = db.remap_key_type::<FacetLevelValueF64Codec>();
|
let db = db.remap_key_type::<FacetLevelValueF64Codec>();
|
||||||
let range = FacetRange::<f64, _>::new(
|
let range = FacetRange::<f64, _>::new(
|
||||||
self.rtxn, db, field_id, 0, Unbounded, Unbounded,
|
self.rtxn, db, field_id, 0, Unbounded, Unbounded,
|
||||||
)?;
|
)?;
|
||||||
Box::new(range.map(|r| r.map(|((_, _, v, _), docids)| (Value::from(v), docids))))
|
Box::new(range.map(|r| r.map(|((_, _, v, _), docids)| (FacetValue::from(v), docids))))
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut facet_values = Vec::new();
|
let mut facet_values = BTreeSet::new();
|
||||||
for result in iter {
|
for result in iter {
|
||||||
let (value, docids) = result?;
|
let (value, docids) = result?;
|
||||||
if self.candidates.as_ref().map_or(true, |c| docids.is_disjoint(c)) {
|
if self.candidates.as_ref().map_or(true, |c| docids.is_disjoint(c)) {
|
||||||
facet_values.push(value);
|
facet_values.insert(value);
|
||||||
}
|
}
|
||||||
if facet_values.len() == self.max_values_by_facet {
|
if facet_values.len() == self.max_values_by_facet {
|
||||||
break;
|
break;
|
||||||
@ -169,7 +167,7 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn execute(&self) -> heed::Result<HashMap<String, Vec<Value>>> {
|
pub fn execute(&self) -> heed::Result<BTreeMap<String, BTreeSet<FacetValue>>> {
|
||||||
let fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
|
let fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
|
||||||
let faceted_fields = self.index.faceted_fields(self.rtxn)?;
|
let faceted_fields = self.index.faceted_fields(self.rtxn)?;
|
||||||
let fields_ids: Vec<_> = match &self.facets {
|
let fields_ids: Vec<_> = match &self.facets {
|
||||||
@ -182,7 +180,7 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
None => faceted_fields.iter().map(|(id, t)| (*id, *t)).collect(),
|
None => faceted_fields.iter().map(|(id, t)| (*id, *t)).collect(),
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut facets_values = HashMap::new();
|
let mut facets_values = BTreeMap::new();
|
||||||
for (fid, ftype) in fields_ids {
|
for (fid, ftype) in fields_ids {
|
||||||
let facet_name = fields_ids_map.name(fid).unwrap();
|
let facet_name = fields_ids_map.name(fid).unwrap();
|
||||||
let values = self.facet_values(fid, ftype)?;
|
let values = self.facet_values(fid, ftype)?;
|
||||||
|
Loading…
Reference in New Issue
Block a user