mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-22 11:17:28 +01:00
Introduce a struct to compute facets values
This commit is contained in:
parent
30dae0205e
commit
3b64735058
@ -15,18 +15,18 @@ $('#query, #facet').on('input', function () {
|
||||
type: "POST",
|
||||
url: "query",
|
||||
contentType: 'application/json',
|
||||
data: JSON.stringify({ 'query': query, 'facetCondition': facet }),
|
||||
data: JSON.stringify({ 'query': query, 'facetCondition': facet, "facetDistribution": true }),
|
||||
contentType: 'application/json',
|
||||
success: function (data, textStatus, request) {
|
||||
results.innerHTML = '';
|
||||
|
||||
let timeSpent = request.getResponseHeader('Time-Ms');
|
||||
let numberOfDocuments = data.length;
|
||||
let numberOfDocuments = data.documents.length;
|
||||
count.innerHTML = `${numberOfDocuments}`;
|
||||
time.innerHTML = `${timeSpent}ms`;
|
||||
time.classList.remove('fade-in-out');
|
||||
|
||||
for (element of data) {
|
||||
for (element of data.documents) {
|
||||
const elem = document.createElement('li');
|
||||
elem.classList.add("document");
|
||||
|
||||
|
@ -626,6 +626,14 @@ async fn main() -> anyhow::Result<()> {
|
||||
struct QueryBody {
|
||||
query: Option<String>,
|
||||
facet_condition: Option<String>,
|
||||
facet_distribution: Option<bool>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct Answer {
|
||||
documents: Vec<Map<String, Value>>,
|
||||
facets: HashMap<String, Vec<Value>>,
|
||||
}
|
||||
|
||||
let disable_highlighting = opt.disable_highlighting;
|
||||
@ -649,7 +657,13 @@ async fn main() -> anyhow::Result<()> {
|
||||
}
|
||||
}
|
||||
|
||||
let SearchResult { found_words, documents_ids } = search.execute().unwrap();
|
||||
let SearchResult { found_words, candidates, documents_ids } = search.execute().unwrap();
|
||||
|
||||
let facets = if query.facet_distribution == Some(true) {
|
||||
Some(index.facets(&rtxn).candidates(candidates).execute().unwrap())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let mut documents = Vec::new();
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
@ -674,10 +688,15 @@ async fn main() -> anyhow::Result<()> {
|
||||
documents.push(object);
|
||||
}
|
||||
|
||||
let answer = Answer {
|
||||
documents,
|
||||
facets: facets.unwrap_or_default(),
|
||||
};
|
||||
|
||||
Response::builder()
|
||||
.header("Content-Type", "application/json")
|
||||
.header("Time-Ms", before_search.elapsed().as_millis().to_string())
|
||||
.body(serde_json::to_string(&documents).unwrap())
|
||||
.body(serde_json::to_string(&answer).unwrap())
|
||||
});
|
||||
|
||||
let index_cloned = index.clone();
|
||||
|
@ -9,7 +9,7 @@ use roaring::RoaringBitmap;
|
||||
|
||||
use crate::facet::FacetType;
|
||||
use crate::fields_ids_map::FieldsIdsMap;
|
||||
use crate::{default_criteria, Criterion, Search};
|
||||
use crate::{default_criteria, Criterion, Search, FacetDistribution};
|
||||
use crate::{BEU32, DocumentId, FieldId, ExternalDocumentsIds};
|
||||
use crate::{
|
||||
RoaringBitmapCodec, BEU32StrCodec, StrStrU8Codec, ObkvCodec,
|
||||
@ -351,6 +351,10 @@ impl Index {
|
||||
Ok(self.documents_ids(rtxn).map(|docids| docids.len() as usize)?)
|
||||
}
|
||||
|
||||
pub fn facets<'a>(&'a self, rtxn: &'a RoTxn) -> FacetDistribution<'a> {
|
||||
FacetDistribution::new(rtxn, self)
|
||||
}
|
||||
|
||||
pub fn search<'a>(&'a self, rtxn: &'a RoTxn) -> Search<'a> {
|
||||
Search::new(rtxn, self)
|
||||
}
|
||||
|
@ -28,7 +28,7 @@ pub use self::fields_ids_map::FieldsIdsMap;
|
||||
pub use self::heed_codec::{BEU32StrCodec, StrStrU8Codec, ObkvCodec};
|
||||
pub use self::heed_codec::{RoaringBitmapCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec};
|
||||
pub use self::index::Index;
|
||||
pub use self::search::{Search, FacetCondition, SearchResult};
|
||||
pub use self::search::{Search, FacetDistribution, FacetCondition, SearchResult};
|
||||
pub use self::update_store::UpdateStore;
|
||||
|
||||
pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>;
|
||||
|
106
src/search/facet/facet_distribution.rs
Normal file
106
src/search/facet/facet_distribution.rs
Normal file
@ -0,0 +1,106 @@
|
||||
use std::collections::{HashSet, HashMap};
|
||||
use std::fmt;
|
||||
use std::ops::Bound::Unbounded;
|
||||
|
||||
use roaring::RoaringBitmap;
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec, FacetLevelValueI64Codec};
|
||||
use crate::search::facet::FacetRange;
|
||||
use crate::{Index, FieldId};
|
||||
|
||||
pub struct FacetDistribution<'a> {
|
||||
facets: Option<HashSet<String>>,
|
||||
candidates: Option<RoaringBitmap>,
|
||||
rtxn: &'a heed::RoTxn<'a>,
|
||||
index: &'a Index,
|
||||
}
|
||||
|
||||
impl<'a> FacetDistribution<'a> {
|
||||
pub fn new(rtxn: &'a heed::RoTxn, index: &'a Index) -> FacetDistribution<'a> {
|
||||
FacetDistribution { facets: None, candidates: None, rtxn, index }
|
||||
}
|
||||
|
||||
pub fn candidates(&mut self, candidates: RoaringBitmap) -> &mut Self {
|
||||
self.candidates = Some(candidates);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn facets<I: IntoIterator<Item=A>, A: AsRef<str>>(&mut self, names: I) -> &mut Self {
|
||||
self.facets = Some(names.into_iter().map(|s| s.as_ref().to_string()).collect());
|
||||
self
|
||||
}
|
||||
|
||||
fn facet_values(&self, field_id: FieldId, field_type: FacetType) -> heed::Result<Vec<Value>> {
|
||||
let db = self.index.facet_field_id_value_docids;
|
||||
let iter = match field_type {
|
||||
FacetType::String => {
|
||||
let iter = db
|
||||
.prefix_iter(&self.rtxn, &[field_id])?
|
||||
.remap_key_type::<FacetValueStringCodec>()
|
||||
.map(|r| r.map(|((_, v), docids)| (Value::from(v), docids)));
|
||||
Box::new(iter) as Box::<dyn Iterator<Item=_>>
|
||||
},
|
||||
FacetType::Integer => {
|
||||
let db = db.remap_key_type::<FacetLevelValueI64Codec>();
|
||||
let range = FacetRange::<i64, _>::new(
|
||||
self.rtxn, db, field_id, 0, Unbounded, Unbounded,
|
||||
)?;
|
||||
Box::new(range.map(|r| r.map(|((_, _, v, _), docids)| (Value::from(v), docids))))
|
||||
},
|
||||
FacetType::Float => {
|
||||
let db = db.remap_key_type::<FacetLevelValueF64Codec>();
|
||||
let range = FacetRange::<f64, _>::new(
|
||||
self.rtxn, db, field_id, 0, Unbounded, Unbounded,
|
||||
)?;
|
||||
Box::new(range.map(|r| r.map(|((_, _, v, _), docids)| (Value::from(v), docids))))
|
||||
},
|
||||
};
|
||||
|
||||
let mut facet_values = Vec::new();
|
||||
for result in iter {
|
||||
let (value, docids) = result?;
|
||||
match &self.candidates {
|
||||
Some(candidates) => if !docids.is_disjoint(candidates) {
|
||||
facet_values.push(value);
|
||||
},
|
||||
None => facet_values.push(value),
|
||||
}
|
||||
}
|
||||
Ok(facet_values)
|
||||
}
|
||||
|
||||
pub fn execute(&self) -> heed::Result<HashMap<String, Vec<Value>>> {
|
||||
let fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
|
||||
let faceted_fields = self.index.faceted_fields(self.rtxn)?;
|
||||
let fields_ids: Vec<_> = match &self.facets {
|
||||
Some(names) => {
|
||||
names.iter().filter_map(|n| {
|
||||
let id = fields_ids_map.id(n)?;
|
||||
faceted_fields.get(&id).cloned().map(|t| (id, t))
|
||||
}).collect()
|
||||
},
|
||||
None => faceted_fields.iter().map(|(id, t)| (*id, *t)).collect(),
|
||||
};
|
||||
|
||||
let mut facets_values = HashMap::new();
|
||||
for (fid, ftype) in fields_ids {
|
||||
let facet_name = fields_ids_map.name(fid).unwrap();
|
||||
let values = self.facet_values(fid, ftype)?;
|
||||
facets_values.insert(facet_name.to_string(), values);
|
||||
}
|
||||
|
||||
Ok(facets_values)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for FacetDistribution<'_> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
let FacetDistribution { facets, candidates, rtxn: _, index: _ } = self;
|
||||
f.debug_struct("FacetDistribution")
|
||||
.field("facets", facets)
|
||||
.field("candidates", candidates)
|
||||
.finish()
|
||||
}
|
||||
}
|
@ -13,11 +13,13 @@ use crate::heed_codec::CboRoaringBitmapCodec;
|
||||
use crate::{Index, FieldId};
|
||||
|
||||
pub use self::facet_condition::{FacetCondition, FacetNumberOperator, FacetStringOperator};
|
||||
pub use self::facet_distribution::FacetDistribution;
|
||||
|
||||
mod facet_condition;
|
||||
mod facet_distribution;
|
||||
mod parser;
|
||||
|
||||
struct FacetRange<'t, T: 't, KC> {
|
||||
pub struct FacetRange<'t, T: 't, KC> {
|
||||
iter: RoRange<'t, KC, LazyDecode<CboRoaringBitmapCodec>>,
|
||||
end: Bound<T>,
|
||||
}
|
||||
@ -27,7 +29,7 @@ where
|
||||
KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>,
|
||||
T: PartialOrd + Copy + Bounded,
|
||||
{
|
||||
fn new(
|
||||
pub fn new(
|
||||
rtxn: &'t heed::RoTxn,
|
||||
db: Database<KC, CboRoaringBitmapCodec>,
|
||||
field_id: FieldId,
|
||||
@ -78,7 +80,7 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
struct FacetRevRange<'t, T: 't, KC> {
|
||||
pub struct FacetRevRange<'t, T: 't, KC> {
|
||||
iter: RoRevRange<'t, KC, LazyDecode<CboRoaringBitmapCodec>>,
|
||||
end: Bound<T>,
|
||||
}
|
||||
@ -88,7 +90,7 @@ where
|
||||
KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>,
|
||||
T: PartialOrd + Copy + Bounded,
|
||||
{
|
||||
fn new(
|
||||
pub fn new(
|
||||
rtxn: &'t heed::RoTxn,
|
||||
db: Database<KC, CboRoaringBitmapCodec>,
|
||||
field_id: FieldId,
|
||||
|
@ -20,7 +20,7 @@ use crate::mdfs::Mdfs;
|
||||
use crate::query_tokens::{query_tokens, QueryToken};
|
||||
use crate::{Index, FieldId, DocumentId, Criterion};
|
||||
|
||||
pub use self::facet::{FacetCondition, FacetNumberOperator, FacetStringOperator};
|
||||
pub use self::facet::{FacetCondition, FacetDistribution, FacetNumberOperator, FacetStringOperator};
|
||||
pub use self::facet::{FacetIter};
|
||||
|
||||
// Building these factories is not free.
|
||||
@ -313,22 +313,26 @@ impl<'a> Search<'a> {
|
||||
// there is some facet conditions we return a placeholder.
|
||||
let documents_ids = match order_by_facet {
|
||||
Some((fid, ftype, is_ascending)) => {
|
||||
self.facet_ordered(fid, ftype, is_ascending, facet_candidates, limit)?
|
||||
self.facet_ordered(fid, ftype, is_ascending, facet_candidates.clone(), limit)?
|
||||
},
|
||||
None => facet_candidates.iter().take(limit).collect(),
|
||||
};
|
||||
return Ok(SearchResult { documents_ids, ..Default::default() })
|
||||
return Ok(SearchResult {
|
||||
documents_ids,
|
||||
candidates: facet_candidates,
|
||||
..Default::default()
|
||||
})
|
||||
},
|
||||
(None, None) => {
|
||||
// If the query is not set or results in no DFAs we return a placeholder.
|
||||
let documents_ids = self.index.documents_ids(self.rtxn)?;
|
||||
let all_docids = self.index.documents_ids(self.rtxn)?;
|
||||
let documents_ids = match order_by_facet {
|
||||
Some((fid, ftype, is_ascending)) => {
|
||||
self.facet_ordered(fid, ftype, is_ascending, documents_ids, limit)?
|
||||
self.facet_ordered(fid, ftype, is_ascending, all_docids.clone(), limit)?
|
||||
},
|
||||
None => documents_ids.iter().take(limit).collect(),
|
||||
None => all_docids.iter().take(limit).collect(),
|
||||
};
|
||||
return Ok(SearchResult { documents_ids, ..Default::default() })
|
||||
return Ok(SearchResult { documents_ids, candidates: all_docids,..Default::default() })
|
||||
},
|
||||
};
|
||||
|
||||
@ -336,7 +340,7 @@ impl<'a> Search<'a> {
|
||||
|
||||
// The mana depth first search is a revised DFS that explore
|
||||
// solutions in the order of their proximities.
|
||||
let mut mdfs = Mdfs::new(self.index, self.rtxn, &derived_words, candidates);
|
||||
let mut mdfs = Mdfs::new(self.index, self.rtxn, &derived_words, candidates.clone());
|
||||
let mut documents = Vec::new();
|
||||
|
||||
// We execute the Mdfs iterator until we find enough documents.
|
||||
@ -364,7 +368,7 @@ impl<'a> Search<'a> {
|
||||
None => documents.into_iter().flatten().take(limit).collect(),
|
||||
};
|
||||
|
||||
Ok(SearchResult { found_words, documents_ids })
|
||||
Ok(SearchResult { found_words, candidates, documents_ids })
|
||||
}
|
||||
}
|
||||
|
||||
@ -383,6 +387,7 @@ impl fmt::Debug for Search<'_> {
|
||||
#[derive(Default)]
|
||||
pub struct SearchResult {
|
||||
pub found_words: HashSet<String>,
|
||||
pub candidates: RoaringBitmap,
|
||||
// TODO those documents ids should be associated with their criteria scores.
|
||||
pub documents_ids: Vec<DocumentId>,
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user