Introduce a struct to compute facets values

This commit is contained in:
Kerollmops 2020-12-28 19:08:53 +01:00
parent 30dae0205e
commit 3b64735058
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
7 changed files with 156 additions and 20 deletions

View File

@ -15,18 +15,18 @@ $('#query, #facet').on('input', function () {
type: "POST", type: "POST",
url: "query", url: "query",
contentType: 'application/json', contentType: 'application/json',
data: JSON.stringify({ 'query': query, 'facetCondition': facet }), data: JSON.stringify({ 'query': query, 'facetCondition': facet, "facetDistribution": true }),
contentType: 'application/json', contentType: 'application/json',
success: function (data, textStatus, request) { success: function (data, textStatus, request) {
results.innerHTML = ''; results.innerHTML = '';
let timeSpent = request.getResponseHeader('Time-Ms'); let timeSpent = request.getResponseHeader('Time-Ms');
let numberOfDocuments = data.length; let numberOfDocuments = data.documents.length;
count.innerHTML = `${numberOfDocuments}`; count.innerHTML = `${numberOfDocuments}`;
time.innerHTML = `${timeSpent}ms`; time.innerHTML = `${timeSpent}ms`;
time.classList.remove('fade-in-out'); time.classList.remove('fade-in-out');
for (element of data) { for (element of data.documents) {
const elem = document.createElement('li'); const elem = document.createElement('li');
elem.classList.add("document"); elem.classList.add("document");

View File

@ -626,6 +626,14 @@ async fn main() -> anyhow::Result<()> {
struct QueryBody { struct QueryBody {
query: Option<String>, query: Option<String>,
facet_condition: Option<String>, facet_condition: Option<String>,
facet_distribution: Option<bool>,
}
#[derive(Debug, Serialize)]
#[serde(rename_all = "camelCase")]
struct Answer {
documents: Vec<Map<String, Value>>,
facets: HashMap<String, Vec<Value>>,
} }
let disable_highlighting = opt.disable_highlighting; let disable_highlighting = opt.disable_highlighting;
@ -649,7 +657,13 @@ async fn main() -> anyhow::Result<()> {
} }
} }
let SearchResult { found_words, documents_ids } = search.execute().unwrap(); let SearchResult { found_words, candidates, documents_ids } = search.execute().unwrap();
let facets = if query.facet_distribution == Some(true) {
Some(index.facets(&rtxn).candidates(candidates).execute().unwrap())
} else {
None
};
let mut documents = Vec::new(); let mut documents = Vec::new();
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
@ -674,10 +688,15 @@ async fn main() -> anyhow::Result<()> {
documents.push(object); documents.push(object);
} }
let answer = Answer {
documents,
facets: facets.unwrap_or_default(),
};
Response::builder() Response::builder()
.header("Content-Type", "application/json") .header("Content-Type", "application/json")
.header("Time-Ms", before_search.elapsed().as_millis().to_string()) .header("Time-Ms", before_search.elapsed().as_millis().to_string())
.body(serde_json::to_string(&documents).unwrap()) .body(serde_json::to_string(&answer).unwrap())
}); });
let index_cloned = index.clone(); let index_cloned = index.clone();

View File

@ -9,7 +9,7 @@ use roaring::RoaringBitmap;
use crate::facet::FacetType; use crate::facet::FacetType;
use crate::fields_ids_map::FieldsIdsMap; use crate::fields_ids_map::FieldsIdsMap;
use crate::{default_criteria, Criterion, Search}; use crate::{default_criteria, Criterion, Search, FacetDistribution};
use crate::{BEU32, DocumentId, FieldId, ExternalDocumentsIds}; use crate::{BEU32, DocumentId, FieldId, ExternalDocumentsIds};
use crate::{ use crate::{
RoaringBitmapCodec, BEU32StrCodec, StrStrU8Codec, ObkvCodec, RoaringBitmapCodec, BEU32StrCodec, StrStrU8Codec, ObkvCodec,
@ -351,6 +351,10 @@ impl Index {
Ok(self.documents_ids(rtxn).map(|docids| docids.len() as usize)?) Ok(self.documents_ids(rtxn).map(|docids| docids.len() as usize)?)
} }
pub fn facets<'a>(&'a self, rtxn: &'a RoTxn) -> FacetDistribution<'a> {
FacetDistribution::new(rtxn, self)
}
pub fn search<'a>(&'a self, rtxn: &'a RoTxn) -> Search<'a> { pub fn search<'a>(&'a self, rtxn: &'a RoTxn) -> Search<'a> {
Search::new(rtxn, self) Search::new(rtxn, self)
} }

View File

@ -28,7 +28,7 @@ pub use self::fields_ids_map::FieldsIdsMap;
pub use self::heed_codec::{BEU32StrCodec, StrStrU8Codec, ObkvCodec}; pub use self::heed_codec::{BEU32StrCodec, StrStrU8Codec, ObkvCodec};
pub use self::heed_codec::{RoaringBitmapCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec}; pub use self::heed_codec::{RoaringBitmapCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec};
pub use self::index::Index; pub use self::index::Index;
pub use self::search::{Search, FacetCondition, SearchResult}; pub use self::search::{Search, FacetDistribution, FacetCondition, SearchResult};
pub use self::update_store::UpdateStore; pub use self::update_store::UpdateStore;
pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>; pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>;

View File

@ -0,0 +1,106 @@
use std::collections::{HashSet, HashMap};
use std::fmt;
use std::ops::Bound::Unbounded;
use roaring::RoaringBitmap;
use serde_json::Value;
use crate::facet::FacetType;
use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec, FacetLevelValueI64Codec};
use crate::search::facet::FacetRange;
use crate::{Index, FieldId};
pub struct FacetDistribution<'a> {
facets: Option<HashSet<String>>,
candidates: Option<RoaringBitmap>,
rtxn: &'a heed::RoTxn<'a>,
index: &'a Index,
}
impl<'a> FacetDistribution<'a> {
pub fn new(rtxn: &'a heed::RoTxn, index: &'a Index) -> FacetDistribution<'a> {
FacetDistribution { facets: None, candidates: None, rtxn, index }
}
pub fn candidates(&mut self, candidates: RoaringBitmap) -> &mut Self {
self.candidates = Some(candidates);
self
}
pub fn facets<I: IntoIterator<Item=A>, A: AsRef<str>>(&mut self, names: I) -> &mut Self {
self.facets = Some(names.into_iter().map(|s| s.as_ref().to_string()).collect());
self
}
fn facet_values(&self, field_id: FieldId, field_type: FacetType) -> heed::Result<Vec<Value>> {
let db = self.index.facet_field_id_value_docids;
let iter = match field_type {
FacetType::String => {
let iter = db
.prefix_iter(&self.rtxn, &[field_id])?
.remap_key_type::<FacetValueStringCodec>()
.map(|r| r.map(|((_, v), docids)| (Value::from(v), docids)));
Box::new(iter) as Box::<dyn Iterator<Item=_>>
},
FacetType::Integer => {
let db = db.remap_key_type::<FacetLevelValueI64Codec>();
let range = FacetRange::<i64, _>::new(
self.rtxn, db, field_id, 0, Unbounded, Unbounded,
)?;
Box::new(range.map(|r| r.map(|((_, _, v, _), docids)| (Value::from(v), docids))))
},
FacetType::Float => {
let db = db.remap_key_type::<FacetLevelValueF64Codec>();
let range = FacetRange::<f64, _>::new(
self.rtxn, db, field_id, 0, Unbounded, Unbounded,
)?;
Box::new(range.map(|r| r.map(|((_, _, v, _), docids)| (Value::from(v), docids))))
},
};
let mut facet_values = Vec::new();
for result in iter {
let (value, docids) = result?;
match &self.candidates {
Some(candidates) => if !docids.is_disjoint(candidates) {
facet_values.push(value);
},
None => facet_values.push(value),
}
}
Ok(facet_values)
}
pub fn execute(&self) -> heed::Result<HashMap<String, Vec<Value>>> {
let fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
let faceted_fields = self.index.faceted_fields(self.rtxn)?;
let fields_ids: Vec<_> = match &self.facets {
Some(names) => {
names.iter().filter_map(|n| {
let id = fields_ids_map.id(n)?;
faceted_fields.get(&id).cloned().map(|t| (id, t))
}).collect()
},
None => faceted_fields.iter().map(|(id, t)| (*id, *t)).collect(),
};
let mut facets_values = HashMap::new();
for (fid, ftype) in fields_ids {
let facet_name = fields_ids_map.name(fid).unwrap();
let values = self.facet_values(fid, ftype)?;
facets_values.insert(facet_name.to_string(), values);
}
Ok(facets_values)
}
}
impl fmt::Debug for FacetDistribution<'_> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let FacetDistribution { facets, candidates, rtxn: _, index: _ } = self;
f.debug_struct("FacetDistribution")
.field("facets", facets)
.field("candidates", candidates)
.finish()
}
}

View File

@ -13,11 +13,13 @@ use crate::heed_codec::CboRoaringBitmapCodec;
use crate::{Index, FieldId}; use crate::{Index, FieldId};
pub use self::facet_condition::{FacetCondition, FacetNumberOperator, FacetStringOperator}; pub use self::facet_condition::{FacetCondition, FacetNumberOperator, FacetStringOperator};
pub use self::facet_distribution::FacetDistribution;
mod facet_condition; mod facet_condition;
mod facet_distribution;
mod parser; mod parser;
struct FacetRange<'t, T: 't, KC> { pub struct FacetRange<'t, T: 't, KC> {
iter: RoRange<'t, KC, LazyDecode<CboRoaringBitmapCodec>>, iter: RoRange<'t, KC, LazyDecode<CboRoaringBitmapCodec>>,
end: Bound<T>, end: Bound<T>,
} }
@ -27,7 +29,7 @@ where
KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>, KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>,
T: PartialOrd + Copy + Bounded, T: PartialOrd + Copy + Bounded,
{ {
fn new( pub fn new(
rtxn: &'t heed::RoTxn, rtxn: &'t heed::RoTxn,
db: Database<KC, CboRoaringBitmapCodec>, db: Database<KC, CboRoaringBitmapCodec>,
field_id: FieldId, field_id: FieldId,
@ -78,7 +80,7 @@ where
} }
} }
struct FacetRevRange<'t, T: 't, KC> { pub struct FacetRevRange<'t, T: 't, KC> {
iter: RoRevRange<'t, KC, LazyDecode<CboRoaringBitmapCodec>>, iter: RoRevRange<'t, KC, LazyDecode<CboRoaringBitmapCodec>>,
end: Bound<T>, end: Bound<T>,
} }
@ -88,7 +90,7 @@ where
KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>, KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>,
T: PartialOrd + Copy + Bounded, T: PartialOrd + Copy + Bounded,
{ {
fn new( pub fn new(
rtxn: &'t heed::RoTxn, rtxn: &'t heed::RoTxn,
db: Database<KC, CboRoaringBitmapCodec>, db: Database<KC, CboRoaringBitmapCodec>,
field_id: FieldId, field_id: FieldId,

View File

@ -20,7 +20,7 @@ use crate::mdfs::Mdfs;
use crate::query_tokens::{query_tokens, QueryToken}; use crate::query_tokens::{query_tokens, QueryToken};
use crate::{Index, FieldId, DocumentId, Criterion}; use crate::{Index, FieldId, DocumentId, Criterion};
pub use self::facet::{FacetCondition, FacetNumberOperator, FacetStringOperator}; pub use self::facet::{FacetCondition, FacetDistribution, FacetNumberOperator, FacetStringOperator};
pub use self::facet::{FacetIter}; pub use self::facet::{FacetIter};
// Building these factories is not free. // Building these factories is not free.
@ -313,22 +313,26 @@ impl<'a> Search<'a> {
// there is some facet conditions we return a placeholder. // there is some facet conditions we return a placeholder.
let documents_ids = match order_by_facet { let documents_ids = match order_by_facet {
Some((fid, ftype, is_ascending)) => { Some((fid, ftype, is_ascending)) => {
self.facet_ordered(fid, ftype, is_ascending, facet_candidates, limit)? self.facet_ordered(fid, ftype, is_ascending, facet_candidates.clone(), limit)?
}, },
None => facet_candidates.iter().take(limit).collect(), None => facet_candidates.iter().take(limit).collect(),
}; };
return Ok(SearchResult { documents_ids, ..Default::default() }) return Ok(SearchResult {
documents_ids,
candidates: facet_candidates,
..Default::default()
})
}, },
(None, None) => { (None, None) => {
// If the query is not set or results in no DFAs we return a placeholder. // If the query is not set or results in no DFAs we return a placeholder.
let documents_ids = self.index.documents_ids(self.rtxn)?; let all_docids = self.index.documents_ids(self.rtxn)?;
let documents_ids = match order_by_facet { let documents_ids = match order_by_facet {
Some((fid, ftype, is_ascending)) => { Some((fid, ftype, is_ascending)) => {
self.facet_ordered(fid, ftype, is_ascending, documents_ids, limit)? self.facet_ordered(fid, ftype, is_ascending, all_docids.clone(), limit)?
}, },
None => documents_ids.iter().take(limit).collect(), None => all_docids.iter().take(limit).collect(),
}; };
return Ok(SearchResult { documents_ids, ..Default::default() }) return Ok(SearchResult { documents_ids, candidates: all_docids,..Default::default() })
}, },
}; };
@ -336,7 +340,7 @@ impl<'a> Search<'a> {
// The mana depth first search is a revised DFS that explore // The mana depth first search is a revised DFS that explore
// solutions in the order of their proximities. // solutions in the order of their proximities.
let mut mdfs = Mdfs::new(self.index, self.rtxn, &derived_words, candidates); let mut mdfs = Mdfs::new(self.index, self.rtxn, &derived_words, candidates.clone());
let mut documents = Vec::new(); let mut documents = Vec::new();
// We execute the Mdfs iterator until we find enough documents. // We execute the Mdfs iterator until we find enough documents.
@ -364,7 +368,7 @@ impl<'a> Search<'a> {
None => documents.into_iter().flatten().take(limit).collect(), None => documents.into_iter().flatten().take(limit).collect(),
}; };
Ok(SearchResult { found_words, documents_ids }) Ok(SearchResult { found_words, candidates, documents_ids })
} }
} }
@ -383,6 +387,7 @@ impl fmt::Debug for Search<'_> {
#[derive(Default)] #[derive(Default)]
pub struct SearchResult { pub struct SearchResult {
pub found_words: HashSet<String>, pub found_words: HashSet<String>,
pub candidates: RoaringBitmap,
// TODO those documents ids should be associated with their criteria scores. // TODO those documents ids should be associated with their criteria scores.
pub documents_ids: Vec<DocumentId>, pub documents_ids: Vec<DocumentId>,
} }