mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-25 14:10:06 +01:00
Merge pull request #66 from meilisearch/show-available-facets
Expose an API to compute facets distribution
This commit is contained in:
commit
fa0cc2dc13
4
Cargo.lock
generated
4
Cargo.lock
generated
@ -1211,9 +1211,9 @@ checksum = "21215c1b9d8f7832b433255bd9eea3e2779aa55b21b2f8e13aad62c74749b237"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "roaring"
|
name = "roaring"
|
||||||
version = "0.6.3"
|
version = "0.6.4"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f12bdbc3b9b2fd12148ee9f97f9e36438f1e84d3ce47fec0ad6b4bfbb62b3a35"
|
checksum = "4d60b41c8f25d07cecab125cb46ebbf234fc055effc61ca2392a3ef4f9422304"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"byteorder",
|
"byteorder",
|
||||||
]
|
]
|
||||||
|
@ -31,7 +31,7 @@ ordered-float = "2.0.0"
|
|||||||
rayon = "1.3.1"
|
rayon = "1.3.1"
|
||||||
regex = "1.4.2"
|
regex = "1.4.2"
|
||||||
ringtail = "0.3.0"
|
ringtail = "0.3.0"
|
||||||
roaring = "0.6.1"
|
roaring = "0.6.4"
|
||||||
serde = { version = "1.0", features = ["derive"] }
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
serde_json = { version = "1.0.59", features = ["preserve_order"] }
|
serde_json = { version = "1.0.59", features = ["preserve_order"] }
|
||||||
slice-group-by = "0.2.6"
|
slice-group-by = "0.2.6"
|
||||||
|
627
http-ui/Cargo.lock
generated
627
http-ui/Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@ -22,6 +22,7 @@ tempfile = "3.1.0"
|
|||||||
askama = "0.10.1"
|
askama = "0.10.1"
|
||||||
askama_warp = "0.10.0"
|
askama_warp = "0.10.0"
|
||||||
bytes = "0.5.6"
|
bytes = "0.5.6"
|
||||||
|
either = "1.6.1"
|
||||||
flate2 = "1.0.19"
|
flate2 = "1.0.19"
|
||||||
futures = "0.3.6"
|
futures = "0.3.6"
|
||||||
serde = { version = "1.0", features = ["derive"] }
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
var request = null;
|
var request = null;
|
||||||
var timeoutID = null;
|
var timeoutID = null;
|
||||||
|
|
||||||
$('#query, #facet').on('input', function () {
|
$('#query, #filters').on('input', function () {
|
||||||
var query = $('#query').val();
|
var query = $('#query').val();
|
||||||
var facet = $('#facet').val();
|
var filters = $('#filters').val();
|
||||||
var timeoutMs = 100;
|
var timeoutMs = 100;
|
||||||
|
|
||||||
if (timeoutID !== null) {
|
if (timeoutID !== null) {
|
||||||
@ -15,18 +15,35 @@ $('#query, #facet').on('input', function () {
|
|||||||
type: "POST",
|
type: "POST",
|
||||||
url: "query",
|
url: "query",
|
||||||
contentType: 'application/json',
|
contentType: 'application/json',
|
||||||
data: JSON.stringify({ 'query': query, 'facetCondition': facet }),
|
data: JSON.stringify({
|
||||||
|
'query': query,
|
||||||
|
'filters': filters,
|
||||||
|
"facetDistribution": true,
|
||||||
|
}),
|
||||||
contentType: 'application/json',
|
contentType: 'application/json',
|
||||||
success: function (data, textStatus, request) {
|
success: function (data, textStatus, request) {
|
||||||
results.innerHTML = '';
|
results.innerHTML = '';
|
||||||
|
facets.innerHTML = '';
|
||||||
|
|
||||||
let timeSpent = request.getResponseHeader('Time-Ms');
|
let timeSpent = request.getResponseHeader('Time-Ms');
|
||||||
let numberOfDocuments = data.length;
|
let numberOfDocuments = data.documents.length;
|
||||||
count.innerHTML = `${numberOfDocuments}`;
|
count.innerHTML = data.numberOfCandidates.toLocaleString();
|
||||||
time.innerHTML = `${timeSpent}ms`;
|
time.innerHTML = `${timeSpent}ms`;
|
||||||
time.classList.remove('fade-in-out');
|
time.classList.remove('fade-in-out');
|
||||||
|
|
||||||
for (element of data) {
|
for (facet_name in data.facets) {
|
||||||
|
for (value in data.facets[facet_name]) {
|
||||||
|
const elem = document.createElement('span');
|
||||||
|
const count = data.facets[facet_name][value];
|
||||||
|
elem.classList.add("tag");
|
||||||
|
elem.setAttribute('data-name', facet_name);
|
||||||
|
elem.setAttribute('data-value', value);
|
||||||
|
elem.innerHTML = `${facet_name}:${value} (${count})`;
|
||||||
|
facets.appendChild(elem);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (element of data.documents) {
|
||||||
const elem = document.createElement('li');
|
const elem = document.createElement('li');
|
||||||
elem.classList.add("document");
|
elem.classList.add("document");
|
||||||
|
|
||||||
@ -54,6 +71,19 @@ $('#query, #facet').on('input', function () {
|
|||||||
results.appendChild(elem);
|
results.appendChild(elem);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// When we click on a tag we append the facet value
|
||||||
|
// at the end of the facet query.
|
||||||
|
$('#facets .tag').on('click', function () {
|
||||||
|
let name = $(this).attr("data-name");
|
||||||
|
let value = $(this).attr("data-value");
|
||||||
|
|
||||||
|
let facet_query = $('#filters').val().trim();
|
||||||
|
if (facet_query === "") {
|
||||||
|
$('#filters').val(`${name} = "${value}"`).trigger('input');
|
||||||
|
} else {
|
||||||
|
$('#filters').val(`${facet_query} AND ${name} = "${value}"`).trigger('input');
|
||||||
|
}
|
||||||
|
});
|
||||||
},
|
},
|
||||||
beforeSend: function () {
|
beforeSend: function () {
|
||||||
if (request !== null) {
|
if (request !== null) {
|
||||||
@ -65,6 +95,25 @@ $('#query, #facet').on('input', function () {
|
|||||||
}, timeoutMs);
|
}, timeoutMs);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
function diffArray(arr1, arr2) {
|
||||||
|
return arr1.concat(arr2).filter(function (val) {
|
||||||
|
if (!(arr1.includes(val) && arr2.includes(val)))
|
||||||
|
return val;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function selectedFacetsToArray(facets_obj) {
|
||||||
|
var array = [];
|
||||||
|
for (const facet_name in facets_obj) {
|
||||||
|
var subarray = [];
|
||||||
|
for (const facet_value of facets_obj[facet_name]) {
|
||||||
|
subarray.push(`${facet_name}:${facet_value}`);
|
||||||
|
}
|
||||||
|
array.push(subarray);
|
||||||
|
}
|
||||||
|
return array;
|
||||||
|
}
|
||||||
|
|
||||||
// Make the number of document a little bit prettier
|
// Make the number of document a little bit prettier
|
||||||
$('#docs-count').text(function(index, text) {
|
$('#docs-count').text(function(index, text) {
|
||||||
return parseInt(text).toLocaleString()
|
return parseInt(text).toLocaleString()
|
||||||
@ -75,8 +124,8 @@ $('#db-size').text(function(index, text) {
|
|||||||
return filesize(parseInt(text))
|
return filesize(parseInt(text))
|
||||||
});
|
});
|
||||||
|
|
||||||
// We trigger the input when we load the script, this way
|
// We trigger the input when we load the script.
|
||||||
// we execute a placeholder search when the input is empty.
|
|
||||||
$(window).on('load', function () {
|
$(window).on('load', function () {
|
||||||
|
// We execute a placeholder search when the input is empty.
|
||||||
$('#query').trigger('input');
|
$('#query').trigger('input');
|
||||||
});
|
});
|
||||||
|
@ -4,6 +4,23 @@
|
|||||||
padding: 0;
|
padding: 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#facets .tag {
|
||||||
|
margin-right: 1em;
|
||||||
|
margin-bottom: 1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
#facets {
|
||||||
|
max-width: 900px;
|
||||||
|
margin: 20px auto 0 auto;
|
||||||
|
padding: 0;
|
||||||
|
max-height: 16em;
|
||||||
|
overflow: scroll;
|
||||||
|
}
|
||||||
|
|
||||||
|
#facets .tag:hover {
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
|
||||||
#logo-white {
|
#logo-white {
|
||||||
display: none;
|
display: none;
|
||||||
}
|
}
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
use std::collections::{HashMap, HashSet};
|
use std::collections::{BTreeMap, HashMap, HashSet};
|
||||||
use std::fmt::Display;
|
use std::fmt::Display;
|
||||||
use std::fs::{File, create_dir_all};
|
use std::fs::{File, create_dir_all};
|
||||||
use std::net::SocketAddr;
|
use std::net::SocketAddr;
|
||||||
@ -11,6 +11,7 @@ use std::{mem, io};
|
|||||||
|
|
||||||
use askama_warp::Template;
|
use askama_warp::Template;
|
||||||
use byte_unit::Byte;
|
use byte_unit::Byte;
|
||||||
|
use either::Either;
|
||||||
use flate2::read::GzDecoder;
|
use flate2::read::GzDecoder;
|
||||||
use futures::stream;
|
use futures::stream;
|
||||||
use futures::{FutureExt, StreamExt};
|
use futures::{FutureExt, StreamExt};
|
||||||
@ -28,6 +29,7 @@ use warp::filters::ws::Message;
|
|||||||
use warp::{Filter, http::Response};
|
use warp::{Filter, http::Response};
|
||||||
use meilisearch_tokenizer::{Analyzer, AnalyzerConfig};
|
use meilisearch_tokenizer::{Analyzer, AnalyzerConfig};
|
||||||
|
|
||||||
|
use milli::facet::FacetValue;
|
||||||
use milli::update::UpdateIndexingStep::*;
|
use milli::update::UpdateIndexingStep::*;
|
||||||
use milli::update::{UpdateBuilder, IndexDocumentsMethod, UpdateFormat};
|
use milli::update::{UpdateBuilder, IndexDocumentsMethod, UpdateFormat};
|
||||||
use milli::{obkv_to_json, Index, UpdateStore, SearchResult, FacetCondition};
|
use milli::{obkv_to_json, Index, UpdateStore, SearchResult, FacetCondition};
|
||||||
@ -620,12 +622,38 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
.body(include_str!("../public/logo-black.svg"))
|
.body(include_str!("../public/logo-black.svg"))
|
||||||
);
|
);
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
#[serde(untagged)]
|
||||||
|
enum UntaggedEither<L, R> {
|
||||||
|
Left(L),
|
||||||
|
Right(R),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<L, R> From<UntaggedEither<L, R>> for Either<L, R> {
|
||||||
|
fn from(value: UntaggedEither<L, R>) -> Either<L, R> {
|
||||||
|
match value {
|
||||||
|
UntaggedEither::Left(left) => Either::Left(left),
|
||||||
|
UntaggedEither::Right(right) => Either::Right(right),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Deserialize)]
|
#[derive(Debug, Deserialize)]
|
||||||
#[serde(deny_unknown_fields)]
|
#[serde(deny_unknown_fields)]
|
||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
struct QueryBody {
|
struct QueryBody {
|
||||||
query: Option<String>,
|
query: Option<String>,
|
||||||
facet_condition: Option<String>,
|
filters: Option<String>,
|
||||||
|
facet_filters: Option<Vec<UntaggedEither<Vec<String>, String>>>,
|
||||||
|
facet_distribution: Option<bool>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
struct Answer {
|
||||||
|
documents: Vec<Map<String, Value>>,
|
||||||
|
number_of_candidates: u64,
|
||||||
|
facets: BTreeMap<String, BTreeMap<FacetValue, u64>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
let disable_highlighting = opt.disable_highlighting;
|
let disable_highlighting = opt.disable_highlighting;
|
||||||
@ -642,14 +670,42 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
if let Some(query) = query.query {
|
if let Some(query) = query.query {
|
||||||
search.query(query);
|
search.query(query);
|
||||||
}
|
}
|
||||||
if let Some(condition) = query.facet_condition {
|
|
||||||
if !condition.trim().is_empty() {
|
let filters = match query.filters {
|
||||||
let condition = FacetCondition::from_str(&rtxn, &index, &condition).unwrap();
|
Some(condition) if !condition.trim().is_empty() => {
|
||||||
search.facet_condition(condition);
|
Some(FacetCondition::from_str(&rtxn, &index, &condition).unwrap())
|
||||||
}
|
},
|
||||||
|
_otherwise => None,
|
||||||
|
};
|
||||||
|
|
||||||
|
let facet_filters = match query.facet_filters {
|
||||||
|
Some(array) => {
|
||||||
|
let eithers = array.into_iter().map(Into::into);
|
||||||
|
FacetCondition::from_array(&rtxn, &index, eithers).unwrap()
|
||||||
|
},
|
||||||
|
_otherwise => None,
|
||||||
|
};
|
||||||
|
|
||||||
|
let condition = match (filters, facet_filters) {
|
||||||
|
(Some(filters), Some(facet_filters)) => {
|
||||||
|
Some(FacetCondition::And(Box::new(filters), Box::new(facet_filters)))
|
||||||
|
},
|
||||||
|
(Some(condition), None) | (None, Some(condition)) => Some(condition),
|
||||||
|
_otherwise => None,
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Some(condition) = condition {
|
||||||
|
search.facet_condition(condition);
|
||||||
}
|
}
|
||||||
|
|
||||||
let SearchResult { found_words, documents_ids } = search.execute().unwrap();
|
let SearchResult { found_words, candidates, documents_ids } = search.execute().unwrap();
|
||||||
|
|
||||||
|
let number_of_candidates = candidates.len();
|
||||||
|
let facets = if query.facet_distribution == Some(true) {
|
||||||
|
Some(index.facets_distribution(&rtxn).candidates(candidates).execute().unwrap())
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
let mut documents = Vec::new();
|
let mut documents = Vec::new();
|
||||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
@ -674,10 +730,16 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
documents.push(object);
|
documents.push(object);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let answer = Answer {
|
||||||
|
documents,
|
||||||
|
number_of_candidates,
|
||||||
|
facets: facets.unwrap_or_default(),
|
||||||
|
};
|
||||||
|
|
||||||
Response::builder()
|
Response::builder()
|
||||||
.header("Content-Type", "application/json")
|
.header("Content-Type", "application/json")
|
||||||
.header("Time-Ms", before_search.elapsed().as_millis().to_string())
|
.header("Time-Ms", before_search.elapsed().as_millis().to_string())
|
||||||
.body(serde_json::to_string(&documents).unwrap())
|
.body(serde_json::to_string(&answer).unwrap())
|
||||||
});
|
});
|
||||||
|
|
||||||
let index_cloned = index.clone();
|
let index_cloned = index.clone();
|
||||||
|
@ -56,7 +56,7 @@
|
|||||||
<div class="level-item">
|
<div class="level-item">
|
||||||
<div class="field has-addons has-addons-right">
|
<div class="field has-addons has-addons-right">
|
||||||
<input id="query" class="input" type="text" autofocus placeholder="e.g. George Clooney">
|
<input id="query" class="input" type="text" autofocus placeholder="e.g. George Clooney">
|
||||||
<input id="facet" class="input" type="text" placeholder="facet filter like released >= 1577836800">
|
<input id="filters" class="input" type="text" placeholder="filters like released >= 1577836800">
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="level-item"></div>
|
<div class="level-item"></div>
|
||||||
@ -66,7 +66,7 @@
|
|||||||
<nav class="level-right">
|
<nav class="level-right">
|
||||||
<div class="level-item has-text-centered">
|
<div class="level-item has-text-centered">
|
||||||
<div>
|
<div>
|
||||||
<p class="heading">Documents</p>
|
<p class="heading">Candidates</p>
|
||||||
<p id="count" class="title">0</p>
|
<p id="count" class="title">0</p>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@ -84,6 +84,10 @@
|
|||||||
</div>
|
</div>
|
||||||
</section>
|
</section>
|
||||||
|
|
||||||
|
<section id="facets">
|
||||||
|
<!-- facet values -->
|
||||||
|
</section>
|
||||||
|
|
||||||
<section>
|
<section>
|
||||||
<ol id="results" class="content">
|
<ol id="results" class="content">
|
||||||
<!-- documents matching requests -->
|
<!-- documents matching requests -->
|
||||||
|
60
src/facet/facet_value.rs
Normal file
60
src/facet/facet_value.rs
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
use ordered_float::OrderedFloat;
|
||||||
|
use serde::{Serialize, Serializer};
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)]
|
||||||
|
pub enum FacetValue {
|
||||||
|
String(String),
|
||||||
|
Float(OrderedFloat<f64>),
|
||||||
|
Integer(i64),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<String> for FacetValue {
|
||||||
|
fn from(string: String) -> FacetValue {
|
||||||
|
FacetValue::String(string)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<&str> for FacetValue {
|
||||||
|
fn from(string: &str) -> FacetValue {
|
||||||
|
FacetValue::String(string.to_owned())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<f64> for FacetValue {
|
||||||
|
fn from(float: f64) -> FacetValue {
|
||||||
|
FacetValue::Float(OrderedFloat(float))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<OrderedFloat<f64>> for FacetValue {
|
||||||
|
fn from(float: OrderedFloat<f64>) -> FacetValue {
|
||||||
|
FacetValue::Float(float)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<i64> for FacetValue {
|
||||||
|
fn from(integer: i64) -> FacetValue {
|
||||||
|
FacetValue::Integer(integer)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// We implement Serialize ourselves because we need to always serialize it as a string,
|
||||||
|
/// JSON object keys must be strings not numbers.
|
||||||
|
impl Serialize for FacetValue {
|
||||||
|
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||||
|
where
|
||||||
|
S: Serializer,
|
||||||
|
{
|
||||||
|
match self {
|
||||||
|
FacetValue::String(string) => serializer.serialize_str(string),
|
||||||
|
FacetValue::Float(float) => {
|
||||||
|
let string = float.to_string();
|
||||||
|
serializer.serialize_str(&string)
|
||||||
|
},
|
||||||
|
FacetValue::Integer(integer) => {
|
||||||
|
let string = integer.to_string();
|
||||||
|
serializer.serialize_str(&string)
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -1,4 +1,6 @@
|
|||||||
mod facet_type;
|
mod facet_type;
|
||||||
|
mod facet_value;
|
||||||
pub mod value_encoding;
|
pub mod value_encoding;
|
||||||
|
|
||||||
pub use self::facet_type::FacetType;
|
pub use self::facet_type::FacetType;
|
||||||
|
pub use self::facet_value::FacetValue;
|
||||||
|
@ -9,7 +9,7 @@ use roaring::RoaringBitmap;
|
|||||||
|
|
||||||
use crate::facet::FacetType;
|
use crate::facet::FacetType;
|
||||||
use crate::fields_ids_map::FieldsIdsMap;
|
use crate::fields_ids_map::FieldsIdsMap;
|
||||||
use crate::{default_criteria, Criterion, Search};
|
use crate::{default_criteria, Criterion, Search, FacetDistribution};
|
||||||
use crate::{BEU32, DocumentId, FieldId, ExternalDocumentsIds};
|
use crate::{BEU32, DocumentId, FieldId, ExternalDocumentsIds};
|
||||||
use crate::{
|
use crate::{
|
||||||
RoaringBitmapCodec, BEU32StrCodec, StrStrU8Codec, ObkvCodec,
|
RoaringBitmapCodec, BEU32StrCodec, StrStrU8Codec, ObkvCodec,
|
||||||
@ -351,6 +351,10 @@ impl Index {
|
|||||||
Ok(self.documents_ids(rtxn).map(|docids| docids.len() as usize)?)
|
Ok(self.documents_ids(rtxn).map(|docids| docids.len() as usize)?)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn facets_distribution<'a>(&'a self, rtxn: &'a RoTxn) -> FacetDistribution<'a> {
|
||||||
|
FacetDistribution::new(rtxn, self)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn search<'a>(&'a self, rtxn: &'a RoTxn) -> Search<'a> {
|
pub fn search<'a>(&'a self, rtxn: &'a RoTxn) -> Search<'a> {
|
||||||
Search::new(rtxn, self)
|
Search::new(rtxn, self)
|
||||||
}
|
}
|
||||||
|
@ -28,7 +28,7 @@ pub use self::fields_ids_map::FieldsIdsMap;
|
|||||||
pub use self::heed_codec::{BEU32StrCodec, StrStrU8Codec, ObkvCodec};
|
pub use self::heed_codec::{BEU32StrCodec, StrStrU8Codec, ObkvCodec};
|
||||||
pub use self::heed_codec::{RoaringBitmapCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec};
|
pub use self::heed_codec::{RoaringBitmapCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec};
|
||||||
pub use self::index::Index;
|
pub use self::index::Index;
|
||||||
pub use self::search::{Search, FacetCondition, SearchResult};
|
pub use self::search::{Search, FacetDistribution, FacetCondition, SearchResult};
|
||||||
pub use self::update_store::UpdateStore;
|
pub use self::update_store::UpdateStore;
|
||||||
|
|
||||||
pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>;
|
pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>;
|
||||||
|
@ -3,6 +3,8 @@ use std::fmt::Debug;
|
|||||||
use std::ops::Bound::{self, Included, Excluded};
|
use std::ops::Bound::{self, Included, Excluded};
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
|
|
||||||
|
use anyhow::Context;
|
||||||
|
use either::Either;
|
||||||
use heed::types::{ByteSlice, DecodeIgnore};
|
use heed::types::{ByteSlice, DecodeIgnore};
|
||||||
use log::debug;
|
use log::debug;
|
||||||
use num_traits::Bounded;
|
use num_traits::Bounded;
|
||||||
@ -141,6 +143,85 @@ where T: FromStr,
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl FacetCondition {
|
impl FacetCondition {
|
||||||
|
pub fn from_array<I, J, A, B>(
|
||||||
|
rtxn: &heed::RoTxn,
|
||||||
|
index: &Index,
|
||||||
|
array: I,
|
||||||
|
) -> anyhow::Result<Option<FacetCondition>>
|
||||||
|
where I: IntoIterator<Item=Either<J, B>>,
|
||||||
|
J: IntoIterator<Item=A>,
|
||||||
|
A: AsRef<str>,
|
||||||
|
B: AsRef<str>,
|
||||||
|
{
|
||||||
|
fn facet_condition(
|
||||||
|
fields_ids_map: &FieldsIdsMap,
|
||||||
|
faceted_fields: &HashMap<String, FacetType>,
|
||||||
|
key: &str,
|
||||||
|
value: &str,
|
||||||
|
) -> anyhow::Result<FacetCondition>
|
||||||
|
{
|
||||||
|
let fid = fields_ids_map.id(key).with_context(|| {
|
||||||
|
format!("{:?} isn't present in the fields ids map", key)
|
||||||
|
})?;
|
||||||
|
let ftype = faceted_fields.get(key).copied().with_context(|| {
|
||||||
|
format!("{:?} isn't a faceted field", key)
|
||||||
|
})?;
|
||||||
|
let (neg, value) = match value.trim().strip_prefix('-') {
|
||||||
|
Some(value) => (true, value.trim()),
|
||||||
|
None => (false, value.trim()),
|
||||||
|
};
|
||||||
|
|
||||||
|
let operator = match ftype {
|
||||||
|
FacetType::String => OperatorString(fid, FacetStringOperator::equal(value)),
|
||||||
|
FacetType::Float => OperatorF64(fid, FacetNumberOperator::Equal(value.parse()?)),
|
||||||
|
FacetType::Integer => OperatorI64(fid, FacetNumberOperator::Equal(value.parse()?)),
|
||||||
|
};
|
||||||
|
|
||||||
|
if neg { Ok(operator.negate()) } else { Ok(operator) }
|
||||||
|
}
|
||||||
|
|
||||||
|
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||||
|
let faceted_fields = index.faceted_fields(rtxn)?;
|
||||||
|
let mut ands = None;
|
||||||
|
|
||||||
|
for either in array {
|
||||||
|
match either {
|
||||||
|
Either::Left(array) => {
|
||||||
|
let mut ors = None;
|
||||||
|
for rule in array {
|
||||||
|
let mut iter = rule.as_ref().splitn(2, ':');
|
||||||
|
let key = iter.next().context("missing facet condition key")?;
|
||||||
|
let value = iter.next().context("missing facet condition value")?;
|
||||||
|
let condition = facet_condition(&fields_ids_map, &faceted_fields, key, value)?;
|
||||||
|
ors = match ors.take() {
|
||||||
|
Some(ors) => Some(Or(Box::new(ors), Box::new(condition))),
|
||||||
|
None => Some(condition),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(rule) = ors {
|
||||||
|
ands = match ands.take() {
|
||||||
|
Some(ands) => Some(And(Box::new(ands), Box::new(rule))),
|
||||||
|
None => Some(rule),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
},
|
||||||
|
Either::Right(rule) => {
|
||||||
|
let mut iter = rule.as_ref().splitn(2, ':');
|
||||||
|
let key = iter.next().context("missing facet condition key")?;
|
||||||
|
let value = iter.next().context("missing facet condition value")?;
|
||||||
|
let condition = facet_condition(&fields_ids_map, &faceted_fields, key, value)?;
|
||||||
|
ands = match ands.take() {
|
||||||
|
Some(ands) => Some(And(Box::new(ands), Box::new(condition))),
|
||||||
|
None => Some(condition),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(ands)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn from_str(
|
pub fn from_str(
|
||||||
rtxn: &heed::RoTxn,
|
rtxn: &heed::RoTxn,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
@ -641,4 +722,35 @@ mod tests {
|
|||||||
);
|
);
|
||||||
assert_eq!(condition, expected);
|
assert_eq!(condition, expected);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn from_array() {
|
||||||
|
let path = tempfile::tempdir().unwrap();
|
||||||
|
let mut options = EnvOpenOptions::new();
|
||||||
|
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||||
|
let index = Index::new(options, &path).unwrap();
|
||||||
|
|
||||||
|
// Set the faceted fields to be the channel.
|
||||||
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
|
let mut builder = Settings::new(&mut wtxn, &index);
|
||||||
|
builder.set_searchable_fields(vec!["channel".into(), "timestamp".into()]); // to keep the fields order
|
||||||
|
builder.set_faceted_fields(hashmap!{
|
||||||
|
"channel".into() => "string".into(),
|
||||||
|
"timestamp".into() => "integer".into(),
|
||||||
|
});
|
||||||
|
builder.execute(|_| ()).unwrap();
|
||||||
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
|
// Test that the facet condition is correctly generated.
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let condition = FacetCondition::from_array(
|
||||||
|
&rtxn, &index,
|
||||||
|
vec![Either::Right("channel:gotaga"), Either::Left(vec!["timestamp:44", "channel:-ponce"])],
|
||||||
|
).unwrap().unwrap();
|
||||||
|
let expected = FacetCondition::from_str(
|
||||||
|
&rtxn, &index,
|
||||||
|
"channel = gotaga AND (timestamp = 44 OR channel != ponce)",
|
||||||
|
).unwrap();
|
||||||
|
assert_eq!(condition, expected);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
260
src/search/facet/facet_distribution.rs
Normal file
260
src/search/facet/facet_distribution.rs
Normal file
@ -0,0 +1,260 @@
|
|||||||
|
use std::collections::{HashSet, BTreeMap};
|
||||||
|
use std::ops::Bound::Unbounded;
|
||||||
|
use std::{cmp, fmt};
|
||||||
|
|
||||||
|
use anyhow::Context;
|
||||||
|
use heed::BytesDecode;
|
||||||
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
|
use crate::facet::{FacetType, FacetValue};
|
||||||
|
use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec, FacetLevelValueI64Codec};
|
||||||
|
use crate::heed_codec::facet::{FieldDocIdFacetStringCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetI64Codec};
|
||||||
|
use crate::search::facet::{FacetIter, FacetRange};
|
||||||
|
use crate::{Index, FieldId, DocumentId};
|
||||||
|
|
||||||
|
/// The default number of values by facets that will
|
||||||
|
/// be fetched from the key-value store.
|
||||||
|
const DEFAULT_VALUES_BY_FACET: usize = 100;
|
||||||
|
|
||||||
|
/// The hard limit in the number of values by facets that will be fetched from
|
||||||
|
/// the key-value store. Searching for more values could slow down the engine.
|
||||||
|
const MAX_VALUES_BY_FACET: usize = 1000;
|
||||||
|
|
||||||
|
/// Threshold on the number of candidates that will make
|
||||||
|
/// the system to choose between one algorithm or another.
|
||||||
|
const CANDIDATES_THRESHOLD: u64 = 1000;
|
||||||
|
|
||||||
|
pub struct FacetDistribution<'a> {
|
||||||
|
facets: Option<HashSet<String>>,
|
||||||
|
candidates: Option<RoaringBitmap>,
|
||||||
|
max_values_by_facet: usize,
|
||||||
|
rtxn: &'a heed::RoTxn<'a>,
|
||||||
|
index: &'a Index,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> FacetDistribution<'a> {
|
||||||
|
pub fn new(rtxn: &'a heed::RoTxn, index: &'a Index) -> FacetDistribution<'a> {
|
||||||
|
FacetDistribution {
|
||||||
|
facets: None,
|
||||||
|
candidates: None,
|
||||||
|
max_values_by_facet: DEFAULT_VALUES_BY_FACET,
|
||||||
|
rtxn,
|
||||||
|
index,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn facets<I: IntoIterator<Item=A>, A: AsRef<str>>(&mut self, names: I) -> &mut Self {
|
||||||
|
self.facets = Some(names.into_iter().map(|s| s.as_ref().to_string()).collect());
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn candidates(&mut self, candidates: RoaringBitmap) -> &mut Self {
|
||||||
|
self.candidates = Some(candidates);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn max_values_by_facet(&mut self, max: usize) -> &mut Self {
|
||||||
|
self.max_values_by_facet = cmp::min(max, MAX_VALUES_BY_FACET);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// There is a small amount of candidates OR we ask for facet string values so we
|
||||||
|
/// decide to iterate over the facet values of each one of them, one by one.
|
||||||
|
fn facet_values_from_documents(
|
||||||
|
&self,
|
||||||
|
field_id: FieldId,
|
||||||
|
facet_type: FacetType,
|
||||||
|
candidates: &RoaringBitmap,
|
||||||
|
) -> heed::Result<BTreeMap<FacetValue, u64>>
|
||||||
|
{
|
||||||
|
fn fetch_facet_values<'t, KC, K: 't>(
|
||||||
|
index: &Index,
|
||||||
|
rtxn: &'t heed::RoTxn,
|
||||||
|
field_id: FieldId,
|
||||||
|
candidates: &RoaringBitmap,
|
||||||
|
) -> heed::Result<BTreeMap<FacetValue, u64>>
|
||||||
|
where
|
||||||
|
KC: BytesDecode<'t, DItem = (FieldId, DocumentId, K)>,
|
||||||
|
K: Into<FacetValue>,
|
||||||
|
{
|
||||||
|
let mut facet_values = BTreeMap::new();
|
||||||
|
let mut key_buffer = vec![field_id];
|
||||||
|
|
||||||
|
for docid in candidates.into_iter().take(CANDIDATES_THRESHOLD as usize) {
|
||||||
|
key_buffer.truncate(1);
|
||||||
|
key_buffer.extend_from_slice(&docid.to_be_bytes());
|
||||||
|
let iter = index.field_id_docid_facet_values
|
||||||
|
.prefix_iter(rtxn, &key_buffer)?
|
||||||
|
.remap_key_type::<KC>();
|
||||||
|
|
||||||
|
for result in iter {
|
||||||
|
let ((_, _, value), ()) = result?;
|
||||||
|
*facet_values.entry(value.into()).or_insert(0) += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(facet_values)
|
||||||
|
}
|
||||||
|
|
||||||
|
let index = self.index;
|
||||||
|
let rtxn = self.rtxn;
|
||||||
|
match facet_type {
|
||||||
|
FacetType::String => {
|
||||||
|
fetch_facet_values::<FieldDocIdFacetStringCodec, _>(index, rtxn, field_id, candidates)
|
||||||
|
},
|
||||||
|
FacetType::Float => {
|
||||||
|
fetch_facet_values::<FieldDocIdFacetF64Codec, _>(index, rtxn, field_id, candidates)
|
||||||
|
},
|
||||||
|
FacetType::Integer => {
|
||||||
|
fetch_facet_values::<FieldDocIdFacetI64Codec, _>(index, rtxn, field_id, candidates)
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// There is too much documents, we use the facet levels to move throught
|
||||||
|
/// the facet values, to find the candidates and values associated.
|
||||||
|
fn facet_values_from_facet_levels(
|
||||||
|
&self,
|
||||||
|
field_id: FieldId,
|
||||||
|
facet_type: FacetType,
|
||||||
|
candidates: &RoaringBitmap,
|
||||||
|
) -> heed::Result<BTreeMap<FacetValue, u64>>
|
||||||
|
{
|
||||||
|
let iter = match facet_type {
|
||||||
|
FacetType::String => unreachable!(),
|
||||||
|
FacetType::Float => {
|
||||||
|
let iter = FacetIter::<f64, FacetLevelValueF64Codec>::new_non_reducing(
|
||||||
|
self.rtxn, self.index, field_id, candidates.clone(),
|
||||||
|
)?;
|
||||||
|
let iter = iter.map(|r| r.map(|(v, docids)| (FacetValue::from(v), docids)));
|
||||||
|
Box::new(iter) as Box::<dyn Iterator<Item=_>>
|
||||||
|
},
|
||||||
|
FacetType::Integer => {
|
||||||
|
let iter = FacetIter::<i64, FacetLevelValueI64Codec>::new_non_reducing(
|
||||||
|
self.rtxn, self.index, field_id, candidates.clone(),
|
||||||
|
)?;
|
||||||
|
Box::new(iter.map(|r| r.map(|(v, docids)| (FacetValue::from(v), docids))))
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut facet_values = BTreeMap::new();
|
||||||
|
for result in iter {
|
||||||
|
let (value, mut docids) = result?;
|
||||||
|
docids.intersect_with(candidates);
|
||||||
|
if !docids.is_empty() {
|
||||||
|
facet_values.insert(value, docids.len());
|
||||||
|
}
|
||||||
|
if facet_values.len() == self.max_values_by_facet {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(facet_values)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Placeholder search, a.k.a. no candidates were specified. We iterate throught the
|
||||||
|
/// facet values one by one and iterate on the facet level 0 for numbers.
|
||||||
|
fn facet_values_from_raw_facet_database(
|
||||||
|
&self,
|
||||||
|
field_id: FieldId,
|
||||||
|
facet_type: FacetType,
|
||||||
|
) -> heed::Result<BTreeMap<FacetValue, u64>>
|
||||||
|
{
|
||||||
|
let db = self.index.facet_field_id_value_docids;
|
||||||
|
let level = 0;
|
||||||
|
let iter = match facet_type {
|
||||||
|
FacetType::String => {
|
||||||
|
let iter = db
|
||||||
|
.prefix_iter(self.rtxn, &[field_id])?
|
||||||
|
.remap_key_type::<FacetValueStringCodec>()
|
||||||
|
.map(|r| r.map(|((_, v), docids)| (FacetValue::from(v), docids)));
|
||||||
|
Box::new(iter) as Box::<dyn Iterator<Item=_>>
|
||||||
|
},
|
||||||
|
FacetType::Float => {
|
||||||
|
let db = db.remap_key_type::<FacetLevelValueF64Codec>();
|
||||||
|
let range = FacetRange::<f64, _>::new(
|
||||||
|
self.rtxn, db, field_id, level, Unbounded, Unbounded,
|
||||||
|
)?;
|
||||||
|
Box::new(range.map(|r| r.map(|((_, _, v, _), docids)| (FacetValue::from(v), docids))))
|
||||||
|
},
|
||||||
|
FacetType::Integer => {
|
||||||
|
let db = db.remap_key_type::<FacetLevelValueI64Codec>();
|
||||||
|
let range = FacetRange::<i64, _>::new(
|
||||||
|
self.rtxn, db, field_id, level, Unbounded, Unbounded,
|
||||||
|
)?;
|
||||||
|
Box::new(range.map(|r| r.map(|((_, _, v, _), docids)| (FacetValue::from(v), docids))))
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut facet_values = BTreeMap::new();
|
||||||
|
for result in iter {
|
||||||
|
let (value, docids) = result?;
|
||||||
|
facet_values.insert(value, docids.len());
|
||||||
|
if facet_values.len() == self.max_values_by_facet {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(facet_values)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn facet_values(
|
||||||
|
&self,
|
||||||
|
field_id: FieldId,
|
||||||
|
facet_type: FacetType,
|
||||||
|
) -> heed::Result<BTreeMap<FacetValue, u64>>
|
||||||
|
{
|
||||||
|
if let Some(candidates) = self.candidates.as_ref() {
|
||||||
|
// Classic search, candidates were specified, we must return facet values only related
|
||||||
|
// to those candidates. We also enter here for facet strings for performance reasons.
|
||||||
|
if candidates.len() <= CANDIDATES_THRESHOLD || facet_type == FacetType::String {
|
||||||
|
self.facet_values_from_documents(field_id, facet_type, candidates)
|
||||||
|
} else {
|
||||||
|
self.facet_values_from_facet_levels(field_id, facet_type, candidates)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
self.facet_values_from_raw_facet_database(field_id, facet_type)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn execute(&self) -> anyhow::Result<BTreeMap<String, BTreeMap<FacetValue, u64>>> {
|
||||||
|
let fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
|
||||||
|
let faceted_fields = self.index.faceted_fields(self.rtxn)?;
|
||||||
|
let fields_ids: Vec<_> = match &self.facets {
|
||||||
|
Some(names) => names
|
||||||
|
.iter()
|
||||||
|
.filter_map(|n| faceted_fields.get(n).map(|t| (n.to_string(), *t)))
|
||||||
|
.collect(),
|
||||||
|
None => faceted_fields.into_iter().collect(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut facets_values = BTreeMap::new();
|
||||||
|
for (name, ftype) in fields_ids {
|
||||||
|
let fid = fields_ids_map.id(&name).with_context(|| {
|
||||||
|
format!("missing field name {:?} from the fields id map", name)
|
||||||
|
})?;
|
||||||
|
let values = self.facet_values(fid, ftype)?;
|
||||||
|
facets_values.insert(name, values);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(facets_values)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Debug for FacetDistribution<'_> {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
let FacetDistribution {
|
||||||
|
facets,
|
||||||
|
candidates,
|
||||||
|
max_values_by_facet,
|
||||||
|
rtxn: _,
|
||||||
|
index: _,
|
||||||
|
} = self;
|
||||||
|
|
||||||
|
f.debug_struct("FacetDistribution")
|
||||||
|
.field("facets", facets)
|
||||||
|
.field("candidates", candidates)
|
||||||
|
.field("max_values_by_facet", max_values_by_facet)
|
||||||
|
.finish()
|
||||||
|
}
|
||||||
|
}
|
@ -13,11 +13,13 @@ use crate::heed_codec::CboRoaringBitmapCodec;
|
|||||||
use crate::{Index, FieldId};
|
use crate::{Index, FieldId};
|
||||||
|
|
||||||
pub use self::facet_condition::{FacetCondition, FacetNumberOperator, FacetStringOperator};
|
pub use self::facet_condition::{FacetCondition, FacetNumberOperator, FacetStringOperator};
|
||||||
|
pub use self::facet_distribution::FacetDistribution;
|
||||||
|
|
||||||
mod facet_condition;
|
mod facet_condition;
|
||||||
|
mod facet_distribution;
|
||||||
mod parser;
|
mod parser;
|
||||||
|
|
||||||
struct FacetRange<'t, T: 't, KC> {
|
pub struct FacetRange<'t, T: 't, KC> {
|
||||||
iter: RoRange<'t, KC, LazyDecode<CboRoaringBitmapCodec>>,
|
iter: RoRange<'t, KC, LazyDecode<CboRoaringBitmapCodec>>,
|
||||||
end: Bound<T>,
|
end: Bound<T>,
|
||||||
}
|
}
|
||||||
@ -27,7 +29,7 @@ where
|
|||||||
KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>,
|
KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>,
|
||||||
T: PartialOrd + Copy + Bounded,
|
T: PartialOrd + Copy + Bounded,
|
||||||
{
|
{
|
||||||
fn new(
|
pub fn new(
|
||||||
rtxn: &'t heed::RoTxn,
|
rtxn: &'t heed::RoTxn,
|
||||||
db: Database<KC, CboRoaringBitmapCodec>,
|
db: Database<KC, CboRoaringBitmapCodec>,
|
||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
@ -78,7 +80,7 @@ where
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct FacetRevRange<'t, T: 't, KC> {
|
pub struct FacetRevRange<'t, T: 't, KC> {
|
||||||
iter: RoRevRange<'t, KC, LazyDecode<CboRoaringBitmapCodec>>,
|
iter: RoRevRange<'t, KC, LazyDecode<CboRoaringBitmapCodec>>,
|
||||||
end: Bound<T>,
|
end: Bound<T>,
|
||||||
}
|
}
|
||||||
@ -88,7 +90,7 @@ where
|
|||||||
KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>,
|
KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>,
|
||||||
T: PartialOrd + Copy + Bounded,
|
T: PartialOrd + Copy + Bounded,
|
||||||
{
|
{
|
||||||
fn new(
|
pub fn new(
|
||||||
rtxn: &'t heed::RoTxn,
|
rtxn: &'t heed::RoTxn,
|
||||||
db: Database<KC, CboRoaringBitmapCodec>,
|
db: Database<KC, CboRoaringBitmapCodec>,
|
||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
@ -145,6 +147,7 @@ pub struct FacetIter<'t, T: 't, KC> {
|
|||||||
db: Database<KC, CboRoaringBitmapCodec>,
|
db: Database<KC, CboRoaringBitmapCodec>,
|
||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
level_iters: Vec<(RoaringBitmap, Either<FacetRange<'t, T, KC>, FacetRevRange<'t, T, KC>>)>,
|
level_iters: Vec<(RoaringBitmap, Either<FacetRange<'t, T, KC>, FacetRevRange<'t, T, KC>>)>,
|
||||||
|
must_reduce: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'t, T, KC> FacetIter<'t, T, KC>
|
impl<'t, T, KC> FacetIter<'t, T, KC>
|
||||||
@ -153,7 +156,10 @@ where
|
|||||||
KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>,
|
KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>,
|
||||||
T: PartialOrd + Copy + Bounded,
|
T: PartialOrd + Copy + Bounded,
|
||||||
{
|
{
|
||||||
pub fn new(
|
/// Create a `FacetIter` that will iterate on the different facet entries
|
||||||
|
/// (facet value + documents ids) and that will reduce the given documents ids
|
||||||
|
/// while iterating on the different facet levels.
|
||||||
|
pub fn new_reducing(
|
||||||
rtxn: &'t heed::RoTxn,
|
rtxn: &'t heed::RoTxn,
|
||||||
index: &'t Index,
|
index: &'t Index,
|
||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
@ -163,10 +169,14 @@ where
|
|||||||
let db = index.facet_field_id_value_docids.remap_key_type::<KC>();
|
let db = index.facet_field_id_value_docids.remap_key_type::<KC>();
|
||||||
let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0);
|
let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0);
|
||||||
let highest_iter = FacetRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?;
|
let highest_iter = FacetRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?;
|
||||||
Ok(FacetIter { rtxn, db, field_id, level_iters: vec![(documents_ids, Left(highest_iter))] })
|
let level_iters = vec![(documents_ids, Left(highest_iter))];
|
||||||
|
Ok(FacetIter { rtxn, db, field_id, level_iters, must_reduce: true })
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new_reverse(
|
/// Create a `FacetIter` that will iterate on the different facet entries in reverse
|
||||||
|
/// (facet value + documents ids) and that will reduce the given documents ids
|
||||||
|
/// while iterating on the different facet levels.
|
||||||
|
pub fn new_reverse_reducing(
|
||||||
rtxn: &'t heed::RoTxn,
|
rtxn: &'t heed::RoTxn,
|
||||||
index: &'t Index,
|
index: &'t Index,
|
||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
@ -176,7 +186,26 @@ where
|
|||||||
let db = index.facet_field_id_value_docids.remap_key_type::<KC>();
|
let db = index.facet_field_id_value_docids.remap_key_type::<KC>();
|
||||||
let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0);
|
let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0);
|
||||||
let highest_iter = FacetRevRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?;
|
let highest_iter = FacetRevRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?;
|
||||||
Ok(FacetIter { rtxn, db, field_id, level_iters: vec![(documents_ids, Right(highest_iter))] })
|
let level_iters = vec![(documents_ids, Right(highest_iter))];
|
||||||
|
Ok(FacetIter { rtxn, db, field_id, level_iters, must_reduce: true })
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a `FacetIter` that will iterate on the different facet entries
|
||||||
|
/// (facet value + documents ids) and that will not reduce the given documents ids
|
||||||
|
/// while iterating on the different facet levels, possibly returning multiple times
|
||||||
|
/// a document id associated with multiple facet values.
|
||||||
|
pub fn new_non_reducing(
|
||||||
|
rtxn: &'t heed::RoTxn,
|
||||||
|
index: &'t Index,
|
||||||
|
field_id: FieldId,
|
||||||
|
documents_ids: RoaringBitmap,
|
||||||
|
) -> heed::Result<FacetIter<'t, T, KC>>
|
||||||
|
{
|
||||||
|
let db = index.facet_field_id_value_docids.remap_key_type::<KC>();
|
||||||
|
let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0);
|
||||||
|
let highest_iter = FacetRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?;
|
||||||
|
let level_iters = vec![(documents_ids, Left(highest_iter))];
|
||||||
|
Ok(FacetIter { rtxn, db, field_id, level_iters, must_reduce: false })
|
||||||
}
|
}
|
||||||
|
|
||||||
fn highest_level<X>(rtxn: &'t heed::RoTxn, db: Database<KC, X>, fid: FieldId) -> heed::Result<Option<u8>> {
|
fn highest_level<X>(rtxn: &'t heed::RoTxn, db: Database<KC, X>, fid: FieldId) -> heed::Result<Option<u8>> {
|
||||||
@ -214,7 +243,9 @@ where
|
|||||||
|
|
||||||
docids.intersect_with(&documents_ids);
|
docids.intersect_with(&documents_ids);
|
||||||
if !docids.is_empty() {
|
if !docids.is_empty() {
|
||||||
documents_ids.difference_with(&docids);
|
if self.must_reduce {
|
||||||
|
documents_ids.difference_with(&docids);
|
||||||
|
}
|
||||||
|
|
||||||
if level == 0 {
|
if level == 0 {
|
||||||
debug!("found {:?} at {:?}", docids, left);
|
debug!("found {:?} at {:?}", docids, left);
|
||||||
|
@ -20,7 +20,7 @@ use crate::mdfs::Mdfs;
|
|||||||
use crate::query_tokens::{query_tokens, QueryToken};
|
use crate::query_tokens::{query_tokens, QueryToken};
|
||||||
use crate::{Index, FieldId, DocumentId, Criterion};
|
use crate::{Index, FieldId, DocumentId, Criterion};
|
||||||
|
|
||||||
pub use self::facet::{FacetCondition, FacetNumberOperator, FacetStringOperator};
|
pub use self::facet::{FacetCondition, FacetDistribution, FacetNumberOperator, FacetStringOperator};
|
||||||
pub use self::facet::{FacetIter};
|
pub use self::facet::{FacetIter};
|
||||||
|
|
||||||
// Building these factories is not free.
|
// Building these factories is not free.
|
||||||
@ -189,9 +189,9 @@ impl<'a> Search<'a> {
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
let facet_fn = if ascending {
|
let facet_fn = if ascending {
|
||||||
FacetIter::<f64, FacetLevelValueF64Codec>::new
|
FacetIter::<f64, FacetLevelValueF64Codec>::new_reducing
|
||||||
} else {
|
} else {
|
||||||
FacetIter::<f64, FacetLevelValueF64Codec>::new_reverse
|
FacetIter::<f64, FacetLevelValueF64Codec>::new_reverse_reducing
|
||||||
};
|
};
|
||||||
let mut limit_tmp = limit;
|
let mut limit_tmp = limit;
|
||||||
let mut output = Vec::new();
|
let mut output = Vec::new();
|
||||||
@ -226,9 +226,9 @@ impl<'a> Search<'a> {
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
let facet_fn = if ascending {
|
let facet_fn = if ascending {
|
||||||
FacetIter::<i64, FacetLevelValueI64Codec>::new
|
FacetIter::<i64, FacetLevelValueI64Codec>::new_reducing
|
||||||
} else {
|
} else {
|
||||||
FacetIter::<i64, FacetLevelValueI64Codec>::new_reverse
|
FacetIter::<i64, FacetLevelValueI64Codec>::new_reverse_reducing
|
||||||
};
|
};
|
||||||
let mut limit_tmp = limit;
|
let mut limit_tmp = limit;
|
||||||
let mut output = Vec::new();
|
let mut output = Vec::new();
|
||||||
@ -313,22 +313,26 @@ impl<'a> Search<'a> {
|
|||||||
// there is some facet conditions we return a placeholder.
|
// there is some facet conditions we return a placeholder.
|
||||||
let documents_ids = match order_by_facet {
|
let documents_ids = match order_by_facet {
|
||||||
Some((fid, ftype, is_ascending)) => {
|
Some((fid, ftype, is_ascending)) => {
|
||||||
self.facet_ordered(fid, ftype, is_ascending, facet_candidates, limit)?
|
self.facet_ordered(fid, ftype, is_ascending, facet_candidates.clone(), limit)?
|
||||||
},
|
},
|
||||||
None => facet_candidates.iter().take(limit).collect(),
|
None => facet_candidates.iter().take(limit).collect(),
|
||||||
};
|
};
|
||||||
return Ok(SearchResult { documents_ids, ..Default::default() })
|
return Ok(SearchResult {
|
||||||
|
documents_ids,
|
||||||
|
candidates: facet_candidates,
|
||||||
|
..Default::default()
|
||||||
|
})
|
||||||
},
|
},
|
||||||
(None, None) => {
|
(None, None) => {
|
||||||
// If the query is not set or results in no DFAs we return a placeholder.
|
// If the query is not set or results in no DFAs we return a placeholder.
|
||||||
let documents_ids = self.index.documents_ids(self.rtxn)?;
|
let all_docids = self.index.documents_ids(self.rtxn)?;
|
||||||
let documents_ids = match order_by_facet {
|
let documents_ids = match order_by_facet {
|
||||||
Some((fid, ftype, is_ascending)) => {
|
Some((fid, ftype, is_ascending)) => {
|
||||||
self.facet_ordered(fid, ftype, is_ascending, documents_ids, limit)?
|
self.facet_ordered(fid, ftype, is_ascending, all_docids.clone(), limit)?
|
||||||
},
|
},
|
||||||
None => documents_ids.iter().take(limit).collect(),
|
None => all_docids.iter().take(limit).collect(),
|
||||||
};
|
};
|
||||||
return Ok(SearchResult { documents_ids, ..Default::default() })
|
return Ok(SearchResult { documents_ids, candidates: all_docids,..Default::default() })
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -336,7 +340,7 @@ impl<'a> Search<'a> {
|
|||||||
|
|
||||||
// The mana depth first search is a revised DFS that explore
|
// The mana depth first search is a revised DFS that explore
|
||||||
// solutions in the order of their proximities.
|
// solutions in the order of their proximities.
|
||||||
let mut mdfs = Mdfs::new(self.index, self.rtxn, &derived_words, candidates);
|
let mut mdfs = Mdfs::new(self.index, self.rtxn, &derived_words, candidates.clone());
|
||||||
let mut documents = Vec::new();
|
let mut documents = Vec::new();
|
||||||
|
|
||||||
// We execute the Mdfs iterator until we find enough documents.
|
// We execute the Mdfs iterator until we find enough documents.
|
||||||
@ -364,7 +368,7 @@ impl<'a> Search<'a> {
|
|||||||
None => documents.into_iter().flatten().take(limit).collect(),
|
None => documents.into_iter().flatten().take(limit).collect(),
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(SearchResult { found_words, documents_ids })
|
Ok(SearchResult { found_words, candidates, documents_ids })
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -383,6 +387,7 @@ impl fmt::Debug for Search<'_> {
|
|||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub struct SearchResult {
|
pub struct SearchResult {
|
||||||
pub found_words: HashSet<String>,
|
pub found_words: HashSet<String>,
|
||||||
|
pub candidates: RoaringBitmap,
|
||||||
// TODO those documents ids should be associated with their criteria scores.
|
// TODO those documents ids should be associated with their criteria scores.
|
||||||
pub documents_ids: Vec<DocumentId>,
|
pub documents_ids: Vec<DocumentId>,
|
||||||
}
|
}
|
||||||
|
@ -29,6 +29,10 @@ pub struct Opt {
|
|||||||
|
|
||||||
/// The query string to search for (doesn't support prefix search yet).
|
/// The query string to search for (doesn't support prefix search yet).
|
||||||
query: Option<String>,
|
query: Option<String>,
|
||||||
|
|
||||||
|
/// Compute and print the facet distribution of all the faceted fields.
|
||||||
|
#[structopt(long)]
|
||||||
|
print_facet_distribution: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn run(opt: Opt) -> anyhow::Result<()> {
|
pub fn run(opt: Opt) -> anyhow::Result<()> {
|
||||||
@ -71,6 +75,12 @@ pub fn run(opt: Opt) -> anyhow::Result<()> {
|
|||||||
let _ = writeln!(&mut stdout);
|
let _ = writeln!(&mut stdout);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if opt.print_facet_distribution {
|
||||||
|
let facets = index.facets_distribution(&rtxn).candidates(result.candidates).execute()?;
|
||||||
|
serde_json::to_writer(&mut stdout, &facets)?;
|
||||||
|
let _ = writeln!(&mut stdout);
|
||||||
|
}
|
||||||
|
|
||||||
debug!("Took {:.02?} to find {} documents", before.elapsed(), result.documents_ids.len());
|
debug!("Took {:.02?} to find {} documents", before.elapsed(), result.documents_ids.len());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user