mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-27 07:14:26 +01:00
Introduce a struct to compute facets values
This commit is contained in:
parent
30dae0205e
commit
3b64735058
@ -15,18 +15,18 @@ $('#query, #facet').on('input', function () {
|
|||||||
type: "POST",
|
type: "POST",
|
||||||
url: "query",
|
url: "query",
|
||||||
contentType: 'application/json',
|
contentType: 'application/json',
|
||||||
data: JSON.stringify({ 'query': query, 'facetCondition': facet }),
|
data: JSON.stringify({ 'query': query, 'facetCondition': facet, "facetDistribution": true }),
|
||||||
contentType: 'application/json',
|
contentType: 'application/json',
|
||||||
success: function (data, textStatus, request) {
|
success: function (data, textStatus, request) {
|
||||||
results.innerHTML = '';
|
results.innerHTML = '';
|
||||||
|
|
||||||
let timeSpent = request.getResponseHeader('Time-Ms');
|
let timeSpent = request.getResponseHeader('Time-Ms');
|
||||||
let numberOfDocuments = data.length;
|
let numberOfDocuments = data.documents.length;
|
||||||
count.innerHTML = `${numberOfDocuments}`;
|
count.innerHTML = `${numberOfDocuments}`;
|
||||||
time.innerHTML = `${timeSpent}ms`;
|
time.innerHTML = `${timeSpent}ms`;
|
||||||
time.classList.remove('fade-in-out');
|
time.classList.remove('fade-in-out');
|
||||||
|
|
||||||
for (element of data) {
|
for (element of data.documents) {
|
||||||
const elem = document.createElement('li');
|
const elem = document.createElement('li');
|
||||||
elem.classList.add("document");
|
elem.classList.add("document");
|
||||||
|
|
||||||
|
@ -626,6 +626,14 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
struct QueryBody {
|
struct QueryBody {
|
||||||
query: Option<String>,
|
query: Option<String>,
|
||||||
facet_condition: Option<String>,
|
facet_condition: Option<String>,
|
||||||
|
facet_distribution: Option<bool>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
struct Answer {
|
||||||
|
documents: Vec<Map<String, Value>>,
|
||||||
|
facets: HashMap<String, Vec<Value>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
let disable_highlighting = opt.disable_highlighting;
|
let disable_highlighting = opt.disable_highlighting;
|
||||||
@ -649,7 +657,13 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let SearchResult { found_words, documents_ids } = search.execute().unwrap();
|
let SearchResult { found_words, candidates, documents_ids } = search.execute().unwrap();
|
||||||
|
|
||||||
|
let facets = if query.facet_distribution == Some(true) {
|
||||||
|
Some(index.facets(&rtxn).candidates(candidates).execute().unwrap())
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
let mut documents = Vec::new();
|
let mut documents = Vec::new();
|
||||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
@ -674,10 +688,15 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
documents.push(object);
|
documents.push(object);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let answer = Answer {
|
||||||
|
documents,
|
||||||
|
facets: facets.unwrap_or_default(),
|
||||||
|
};
|
||||||
|
|
||||||
Response::builder()
|
Response::builder()
|
||||||
.header("Content-Type", "application/json")
|
.header("Content-Type", "application/json")
|
||||||
.header("Time-Ms", before_search.elapsed().as_millis().to_string())
|
.header("Time-Ms", before_search.elapsed().as_millis().to_string())
|
||||||
.body(serde_json::to_string(&documents).unwrap())
|
.body(serde_json::to_string(&answer).unwrap())
|
||||||
});
|
});
|
||||||
|
|
||||||
let index_cloned = index.clone();
|
let index_cloned = index.clone();
|
||||||
|
@ -9,7 +9,7 @@ use roaring::RoaringBitmap;
|
|||||||
|
|
||||||
use crate::facet::FacetType;
|
use crate::facet::FacetType;
|
||||||
use crate::fields_ids_map::FieldsIdsMap;
|
use crate::fields_ids_map::FieldsIdsMap;
|
||||||
use crate::{default_criteria, Criterion, Search};
|
use crate::{default_criteria, Criterion, Search, FacetDistribution};
|
||||||
use crate::{BEU32, DocumentId, FieldId, ExternalDocumentsIds};
|
use crate::{BEU32, DocumentId, FieldId, ExternalDocumentsIds};
|
||||||
use crate::{
|
use crate::{
|
||||||
RoaringBitmapCodec, BEU32StrCodec, StrStrU8Codec, ObkvCodec,
|
RoaringBitmapCodec, BEU32StrCodec, StrStrU8Codec, ObkvCodec,
|
||||||
@ -351,6 +351,10 @@ impl Index {
|
|||||||
Ok(self.documents_ids(rtxn).map(|docids| docids.len() as usize)?)
|
Ok(self.documents_ids(rtxn).map(|docids| docids.len() as usize)?)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn facets<'a>(&'a self, rtxn: &'a RoTxn) -> FacetDistribution<'a> {
|
||||||
|
FacetDistribution::new(rtxn, self)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn search<'a>(&'a self, rtxn: &'a RoTxn) -> Search<'a> {
|
pub fn search<'a>(&'a self, rtxn: &'a RoTxn) -> Search<'a> {
|
||||||
Search::new(rtxn, self)
|
Search::new(rtxn, self)
|
||||||
}
|
}
|
||||||
|
@ -28,7 +28,7 @@ pub use self::fields_ids_map::FieldsIdsMap;
|
|||||||
pub use self::heed_codec::{BEU32StrCodec, StrStrU8Codec, ObkvCodec};
|
pub use self::heed_codec::{BEU32StrCodec, StrStrU8Codec, ObkvCodec};
|
||||||
pub use self::heed_codec::{RoaringBitmapCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec};
|
pub use self::heed_codec::{RoaringBitmapCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec};
|
||||||
pub use self::index::Index;
|
pub use self::index::Index;
|
||||||
pub use self::search::{Search, FacetCondition, SearchResult};
|
pub use self::search::{Search, FacetDistribution, FacetCondition, SearchResult};
|
||||||
pub use self::update_store::UpdateStore;
|
pub use self::update_store::UpdateStore;
|
||||||
|
|
||||||
pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>;
|
pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>;
|
||||||
|
106
src/search/facet/facet_distribution.rs
Normal file
106
src/search/facet/facet_distribution.rs
Normal file
@ -0,0 +1,106 @@
|
|||||||
|
use std::collections::{HashSet, HashMap};
|
||||||
|
use std::fmt;
|
||||||
|
use std::ops::Bound::Unbounded;
|
||||||
|
|
||||||
|
use roaring::RoaringBitmap;
|
||||||
|
use serde_json::Value;
|
||||||
|
|
||||||
|
use crate::facet::FacetType;
|
||||||
|
use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec, FacetLevelValueI64Codec};
|
||||||
|
use crate::search::facet::FacetRange;
|
||||||
|
use crate::{Index, FieldId};
|
||||||
|
|
||||||
|
pub struct FacetDistribution<'a> {
|
||||||
|
facets: Option<HashSet<String>>,
|
||||||
|
candidates: Option<RoaringBitmap>,
|
||||||
|
rtxn: &'a heed::RoTxn<'a>,
|
||||||
|
index: &'a Index,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> FacetDistribution<'a> {
|
||||||
|
pub fn new(rtxn: &'a heed::RoTxn, index: &'a Index) -> FacetDistribution<'a> {
|
||||||
|
FacetDistribution { facets: None, candidates: None, rtxn, index }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn candidates(&mut self, candidates: RoaringBitmap) -> &mut Self {
|
||||||
|
self.candidates = Some(candidates);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn facets<I: IntoIterator<Item=A>, A: AsRef<str>>(&mut self, names: I) -> &mut Self {
|
||||||
|
self.facets = Some(names.into_iter().map(|s| s.as_ref().to_string()).collect());
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
fn facet_values(&self, field_id: FieldId, field_type: FacetType) -> heed::Result<Vec<Value>> {
|
||||||
|
let db = self.index.facet_field_id_value_docids;
|
||||||
|
let iter = match field_type {
|
||||||
|
FacetType::String => {
|
||||||
|
let iter = db
|
||||||
|
.prefix_iter(&self.rtxn, &[field_id])?
|
||||||
|
.remap_key_type::<FacetValueStringCodec>()
|
||||||
|
.map(|r| r.map(|((_, v), docids)| (Value::from(v), docids)));
|
||||||
|
Box::new(iter) as Box::<dyn Iterator<Item=_>>
|
||||||
|
},
|
||||||
|
FacetType::Integer => {
|
||||||
|
let db = db.remap_key_type::<FacetLevelValueI64Codec>();
|
||||||
|
let range = FacetRange::<i64, _>::new(
|
||||||
|
self.rtxn, db, field_id, 0, Unbounded, Unbounded,
|
||||||
|
)?;
|
||||||
|
Box::new(range.map(|r| r.map(|((_, _, v, _), docids)| (Value::from(v), docids))))
|
||||||
|
},
|
||||||
|
FacetType::Float => {
|
||||||
|
let db = db.remap_key_type::<FacetLevelValueF64Codec>();
|
||||||
|
let range = FacetRange::<f64, _>::new(
|
||||||
|
self.rtxn, db, field_id, 0, Unbounded, Unbounded,
|
||||||
|
)?;
|
||||||
|
Box::new(range.map(|r| r.map(|((_, _, v, _), docids)| (Value::from(v), docids))))
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut facet_values = Vec::new();
|
||||||
|
for result in iter {
|
||||||
|
let (value, docids) = result?;
|
||||||
|
match &self.candidates {
|
||||||
|
Some(candidates) => if !docids.is_disjoint(candidates) {
|
||||||
|
facet_values.push(value);
|
||||||
|
},
|
||||||
|
None => facet_values.push(value),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(facet_values)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn execute(&self) -> heed::Result<HashMap<String, Vec<Value>>> {
|
||||||
|
let fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
|
||||||
|
let faceted_fields = self.index.faceted_fields(self.rtxn)?;
|
||||||
|
let fields_ids: Vec<_> = match &self.facets {
|
||||||
|
Some(names) => {
|
||||||
|
names.iter().filter_map(|n| {
|
||||||
|
let id = fields_ids_map.id(n)?;
|
||||||
|
faceted_fields.get(&id).cloned().map(|t| (id, t))
|
||||||
|
}).collect()
|
||||||
|
},
|
||||||
|
None => faceted_fields.iter().map(|(id, t)| (*id, *t)).collect(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut facets_values = HashMap::new();
|
||||||
|
for (fid, ftype) in fields_ids {
|
||||||
|
let facet_name = fields_ids_map.name(fid).unwrap();
|
||||||
|
let values = self.facet_values(fid, ftype)?;
|
||||||
|
facets_values.insert(facet_name.to_string(), values);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(facets_values)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Debug for FacetDistribution<'_> {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
let FacetDistribution { facets, candidates, rtxn: _, index: _ } = self;
|
||||||
|
f.debug_struct("FacetDistribution")
|
||||||
|
.field("facets", facets)
|
||||||
|
.field("candidates", candidates)
|
||||||
|
.finish()
|
||||||
|
}
|
||||||
|
}
|
@ -13,11 +13,13 @@ use crate::heed_codec::CboRoaringBitmapCodec;
|
|||||||
use crate::{Index, FieldId};
|
use crate::{Index, FieldId};
|
||||||
|
|
||||||
pub use self::facet_condition::{FacetCondition, FacetNumberOperator, FacetStringOperator};
|
pub use self::facet_condition::{FacetCondition, FacetNumberOperator, FacetStringOperator};
|
||||||
|
pub use self::facet_distribution::FacetDistribution;
|
||||||
|
|
||||||
mod facet_condition;
|
mod facet_condition;
|
||||||
|
mod facet_distribution;
|
||||||
mod parser;
|
mod parser;
|
||||||
|
|
||||||
struct FacetRange<'t, T: 't, KC> {
|
pub struct FacetRange<'t, T: 't, KC> {
|
||||||
iter: RoRange<'t, KC, LazyDecode<CboRoaringBitmapCodec>>,
|
iter: RoRange<'t, KC, LazyDecode<CboRoaringBitmapCodec>>,
|
||||||
end: Bound<T>,
|
end: Bound<T>,
|
||||||
}
|
}
|
||||||
@ -27,7 +29,7 @@ where
|
|||||||
KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>,
|
KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>,
|
||||||
T: PartialOrd + Copy + Bounded,
|
T: PartialOrd + Copy + Bounded,
|
||||||
{
|
{
|
||||||
fn new(
|
pub fn new(
|
||||||
rtxn: &'t heed::RoTxn,
|
rtxn: &'t heed::RoTxn,
|
||||||
db: Database<KC, CboRoaringBitmapCodec>,
|
db: Database<KC, CboRoaringBitmapCodec>,
|
||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
@ -78,7 +80,7 @@ where
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct FacetRevRange<'t, T: 't, KC> {
|
pub struct FacetRevRange<'t, T: 't, KC> {
|
||||||
iter: RoRevRange<'t, KC, LazyDecode<CboRoaringBitmapCodec>>,
|
iter: RoRevRange<'t, KC, LazyDecode<CboRoaringBitmapCodec>>,
|
||||||
end: Bound<T>,
|
end: Bound<T>,
|
||||||
}
|
}
|
||||||
@ -88,7 +90,7 @@ where
|
|||||||
KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>,
|
KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>,
|
||||||
T: PartialOrd + Copy + Bounded,
|
T: PartialOrd + Copy + Bounded,
|
||||||
{
|
{
|
||||||
fn new(
|
pub fn new(
|
||||||
rtxn: &'t heed::RoTxn,
|
rtxn: &'t heed::RoTxn,
|
||||||
db: Database<KC, CboRoaringBitmapCodec>,
|
db: Database<KC, CboRoaringBitmapCodec>,
|
||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
|
@ -20,7 +20,7 @@ use crate::mdfs::Mdfs;
|
|||||||
use crate::query_tokens::{query_tokens, QueryToken};
|
use crate::query_tokens::{query_tokens, QueryToken};
|
||||||
use crate::{Index, FieldId, DocumentId, Criterion};
|
use crate::{Index, FieldId, DocumentId, Criterion};
|
||||||
|
|
||||||
pub use self::facet::{FacetCondition, FacetNumberOperator, FacetStringOperator};
|
pub use self::facet::{FacetCondition, FacetDistribution, FacetNumberOperator, FacetStringOperator};
|
||||||
pub use self::facet::{FacetIter};
|
pub use self::facet::{FacetIter};
|
||||||
|
|
||||||
// Building these factories is not free.
|
// Building these factories is not free.
|
||||||
@ -313,22 +313,26 @@ impl<'a> Search<'a> {
|
|||||||
// there is some facet conditions we return a placeholder.
|
// there is some facet conditions we return a placeholder.
|
||||||
let documents_ids = match order_by_facet {
|
let documents_ids = match order_by_facet {
|
||||||
Some((fid, ftype, is_ascending)) => {
|
Some((fid, ftype, is_ascending)) => {
|
||||||
self.facet_ordered(fid, ftype, is_ascending, facet_candidates, limit)?
|
self.facet_ordered(fid, ftype, is_ascending, facet_candidates.clone(), limit)?
|
||||||
},
|
},
|
||||||
None => facet_candidates.iter().take(limit).collect(),
|
None => facet_candidates.iter().take(limit).collect(),
|
||||||
};
|
};
|
||||||
return Ok(SearchResult { documents_ids, ..Default::default() })
|
return Ok(SearchResult {
|
||||||
|
documents_ids,
|
||||||
|
candidates: facet_candidates,
|
||||||
|
..Default::default()
|
||||||
|
})
|
||||||
},
|
},
|
||||||
(None, None) => {
|
(None, None) => {
|
||||||
// If the query is not set or results in no DFAs we return a placeholder.
|
// If the query is not set or results in no DFAs we return a placeholder.
|
||||||
let documents_ids = self.index.documents_ids(self.rtxn)?;
|
let all_docids = self.index.documents_ids(self.rtxn)?;
|
||||||
let documents_ids = match order_by_facet {
|
let documents_ids = match order_by_facet {
|
||||||
Some((fid, ftype, is_ascending)) => {
|
Some((fid, ftype, is_ascending)) => {
|
||||||
self.facet_ordered(fid, ftype, is_ascending, documents_ids, limit)?
|
self.facet_ordered(fid, ftype, is_ascending, all_docids.clone(), limit)?
|
||||||
},
|
},
|
||||||
None => documents_ids.iter().take(limit).collect(),
|
None => all_docids.iter().take(limit).collect(),
|
||||||
};
|
};
|
||||||
return Ok(SearchResult { documents_ids, ..Default::default() })
|
return Ok(SearchResult { documents_ids, candidates: all_docids,..Default::default() })
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -336,7 +340,7 @@ impl<'a> Search<'a> {
|
|||||||
|
|
||||||
// The mana depth first search is a revised DFS that explore
|
// The mana depth first search is a revised DFS that explore
|
||||||
// solutions in the order of their proximities.
|
// solutions in the order of their proximities.
|
||||||
let mut mdfs = Mdfs::new(self.index, self.rtxn, &derived_words, candidates);
|
let mut mdfs = Mdfs::new(self.index, self.rtxn, &derived_words, candidates.clone());
|
||||||
let mut documents = Vec::new();
|
let mut documents = Vec::new();
|
||||||
|
|
||||||
// We execute the Mdfs iterator until we find enough documents.
|
// We execute the Mdfs iterator until we find enough documents.
|
||||||
@ -364,7 +368,7 @@ impl<'a> Search<'a> {
|
|||||||
None => documents.into_iter().flatten().take(limit).collect(),
|
None => documents.into_iter().flatten().take(limit).collect(),
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(SearchResult { found_words, documents_ids })
|
Ok(SearchResult { found_words, candidates, documents_ids })
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -383,6 +387,7 @@ impl fmt::Debug for Search<'_> {
|
|||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub struct SearchResult {
|
pub struct SearchResult {
|
||||||
pub found_words: HashSet<String>,
|
pub found_words: HashSet<String>,
|
||||||
|
pub candidates: RoaringBitmap,
|
||||||
// TODO those documents ids should be associated with their criteria scores.
|
// TODO those documents ids should be associated with their criteria scores.
|
||||||
pub documents_ids: Vec<DocumentId>,
|
pub documents_ids: Vec<DocumentId>,
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user