mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-30 08:44:27 +01:00
Merge pull request #40 from meilisearch/asc-desc-faceted-fields
Ascending and descending custom ranking
This commit is contained in:
commit
9628da2d17
61
Cargo.lock
generated
61
Cargo.lock
generated
@ -6,6 +6,15 @@ version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ccc9a9dd069569f212bc4330af9f17c4afb5e8ce185e83dbb14f1349dda18b10"
|
||||
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "0.7.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anyhow"
|
||||
version = "1.0.31"
|
||||
@ -345,6 +354,16 @@ dependencies = [
|
||||
"miniz_oxide",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "form_urlencoded"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ece68d15c92e84fa4f19d3780f1294e5ca82a78a6d515f1efaabcc144688be00"
|
||||
dependencies = [
|
||||
"matches",
|
||||
"percent-encoding",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fs_extra"
|
||||
version = "1.1.0"
|
||||
@ -429,9 +448,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "heed"
|
||||
version = "0.10.4"
|
||||
version = "0.10.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cddc0d0d20adfc803b3e57c2d84447e134cad636202e68e275c65e3cbe63c616"
|
||||
checksum = "2eaba3b0edee6a9cd551f24caca2027922b03259f7203a15f0b86af4c1348fcc"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"heed-traits",
|
||||
@ -453,9 +472,9 @@ checksum = "b328f6260a7e51bdb0ca6b68e6ea27ee3d11fba5dee930896ee7ff6ad5fc072c"
|
||||
|
||||
[[package]]
|
||||
name = "heed-types"
|
||||
version = "0.7.1"
|
||||
version = "0.7.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72fc61caee13e85ea330eabf0c6c7098c511ff173bcb57a760b1eda3bba9f6eb"
|
||||
checksum = "e628efb08beaee58355f80dc4adba79d644940ea9eef60175ea17dc218aab405"
|
||||
dependencies = [
|
||||
"bincode",
|
||||
"heed-traits",
|
||||
@ -571,9 +590,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.79"
|
||||
version = "0.2.80"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2448f6066e80e3bfc792e9c98bf705b4b0fc6e8ef5b43e5889aff0eaa9c58743"
|
||||
checksum = "4d58d1b70b004888f764dfbf6a26a3b0342a1632d33968e4a179d8011c760614"
|
||||
|
||||
[[package]]
|
||||
name = "linked-hash-map"
|
||||
@ -654,6 +673,7 @@ dependencies = [
|
||||
"criterion",
|
||||
"crossbeam-channel",
|
||||
"csv",
|
||||
"either",
|
||||
"flate2",
|
||||
"fst",
|
||||
"fxhash",
|
||||
@ -675,6 +695,7 @@ dependencies = [
|
||||
"pest 2.1.3 (git+https://github.com/pest-parser/pest.git?rev=51fd1d49f1041f7839975664ef71fe15c7dcaf67)",
|
||||
"pest_derive",
|
||||
"rayon",
|
||||
"regex",
|
||||
"ringtail",
|
||||
"roaring",
|
||||
"serde",
|
||||
@ -760,9 +781,9 @@ checksum = "ce30a214135d83e7250f2e8fad781f7cb987e3a3f1b4529712d891594bda311c"
|
||||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.4.0"
|
||||
version = "1.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0b631f7e854af39a1739f401cf34a8a013dfe09eac4fa4dba91e9768bd28168d"
|
||||
checksum = "13bd41f508810a131401606d54ac32a467c97172d74ba7662562ebba5ad07fa0"
|
||||
|
||||
[[package]]
|
||||
name = "oorandom"
|
||||
@ -1003,11 +1024,14 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.4.1"
|
||||
version = "1.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8963b85b8ce3074fecffde43b4b0dded83ce2f367dc8d363afc56679f3ee820b"
|
||||
checksum = "38cf2c13ed4745de91a5eb834e11c00bcc3709e773173b2ce4c56c9fbde04b9c"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-syntax",
|
||||
"thread_local",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -1021,9 +1045,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.6.20"
|
||||
version = "0.6.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8cab7a364d15cde1e505267766a2d3c4e22a843e1a601f0fa7564c0f82ced11c"
|
||||
checksum = "3b181ba2dcf07aaccad5448e8ead58db5b742cf85dfe035e2227f137a539a189"
|
||||
|
||||
[[package]]
|
||||
name = "remove_dir_all"
|
||||
@ -1096,9 +1120,9 @@ checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.110"
|
||||
version = "1.0.117"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "99e7b308464d16b56eba9964e4972a3eee817760ab60d88c3f86e1fecb08204c"
|
||||
checksum = "b88fa983de7720629c9387e9f517353ed404164b1e482c970a90c1a4aaf7dc1a"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
@ -1115,9 +1139,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.110"
|
||||
version = "1.0.117"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "818fbf6bfa9a42d3bfcaca148547aa00c7b915bec71d1757aa2d44ca68771984"
|
||||
checksum = "cbd1ae72adb44aab48f325a02444a5fc079349a8d804c1fc922aed3f7454c74e"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@ -1406,10 +1430,11 @@ checksum = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c"
|
||||
|
||||
[[package]]
|
||||
name = "url"
|
||||
version = "2.1.1"
|
||||
version = "2.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "829d4a8476c35c9bf0bbce5a3b23f4106f79728039b726d292bb93bc106787cb"
|
||||
checksum = "5909f2b0817350449ed73e8bcd81c8c3c8d9a7a5d8acba4b27db277f1868976e"
|
||||
dependencies = [
|
||||
"form_urlencoded",
|
||||
"idna",
|
||||
"matches",
|
||||
"percent-encoding",
|
||||
|
@ -10,11 +10,12 @@ bstr = "0.2.13"
|
||||
byteorder = "1.3.4"
|
||||
crossbeam-channel = "0.5.0"
|
||||
csv = "1.1.3"
|
||||
either = "1.6.1"
|
||||
flate2 = "1.0.17"
|
||||
fst = "0.4.4"
|
||||
fxhash = "0.2.1"
|
||||
grenad = { git = "https://github.com/Kerollmops/grenad.git", rev = "3eb7ad9" }
|
||||
heed = { version = "0.10.4", default-features = false, features = ["lmdb", "sync-read-txn"] }
|
||||
heed = { version = "0.10.5", default-features = false, features = ["lmdb", "sync-read-txn"] }
|
||||
human_format = "1.0.3"
|
||||
jemallocator = "0.3.2"
|
||||
levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] }
|
||||
@ -26,6 +27,7 @@ obkv = "0.1.0"
|
||||
once_cell = "1.4.0"
|
||||
ordered-float = "2.0.0"
|
||||
rayon = "1.3.1"
|
||||
regex = "1.4.2"
|
||||
ringtail = "0.3.0"
|
||||
roaring = "0.6.1"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
|
56
http-ui/Cargo.lock
generated
56
http-ui/Cargo.lock
generated
@ -6,6 +6,15 @@ version = "0.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ee2a4ec343196209d6594e19543ae87a39f96d5534d7174822a3ad825dd6ed7e"
|
||||
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "0.7.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anyhow"
|
||||
version = "1.0.34"
|
||||
@ -404,6 +413,16 @@ version = "1.0.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
|
||||
|
||||
[[package]]
|
||||
name = "form_urlencoded"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ece68d15c92e84fa4f19d3780f1294e5ca82a78a6d515f1efaabcc144688be00"
|
||||
dependencies = [
|
||||
"matches",
|
||||
"percent-encoding",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fs_extra"
|
||||
version = "1.2.0"
|
||||
@ -654,9 +673,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "heed"
|
||||
version = "0.10.4"
|
||||
version = "0.10.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cddc0d0d20adfc803b3e57c2d84447e134cad636202e68e275c65e3cbe63c616"
|
||||
checksum = "2eaba3b0edee6a9cd551f24caca2027922b03259f7203a15f0b86af4c1348fcc"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"heed-traits",
|
||||
@ -678,9 +697,9 @@ checksum = "b328f6260a7e51bdb0ca6b68e6ea27ee3d11fba5dee930896ee7ff6ad5fc072c"
|
||||
|
||||
[[package]]
|
||||
name = "heed-types"
|
||||
version = "0.7.1"
|
||||
version = "0.7.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72fc61caee13e85ea330eabf0c6c7098c511ff173bcb57a760b1eda3bba9f6eb"
|
||||
checksum = "e628efb08beaee58355f80dc4adba79d644940ea9eef60175ea17dc218aab405"
|
||||
dependencies = [
|
||||
"bincode",
|
||||
"heed-traits",
|
||||
@ -980,6 +999,7 @@ dependencies = [
|
||||
"byteorder",
|
||||
"crossbeam-channel",
|
||||
"csv",
|
||||
"either",
|
||||
"flate2",
|
||||
"fst",
|
||||
"fxhash",
|
||||
@ -1000,6 +1020,7 @@ dependencies = [
|
||||
"pest 2.1.3 (git+https://github.com/pest-parser/pest.git?rev=51fd1d49f1041f7839975664ef71fe15c7dcaf67)",
|
||||
"pest_derive",
|
||||
"rayon",
|
||||
"regex",
|
||||
"ringtail",
|
||||
"roaring",
|
||||
"serde",
|
||||
@ -1199,9 +1220,9 @@ checksum = "ddd8a5a0aa2f3adafe349259a5b3e21a19c388b792414c1161d60a69c1fa48e8"
|
||||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.4.1"
|
||||
version = "1.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "260e51e7efe62b592207e9e13a68e43692a7a279171d6ba57abd208bf23645ad"
|
||||
checksum = "13bd41f508810a131401606d54ac32a467c97172d74ba7662562ebba5ad07fa0"
|
||||
|
||||
[[package]]
|
||||
name = "opaque-debug"
|
||||
@ -1602,6 +1623,18 @@ version = "0.1.57"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce"
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "38cf2c13ed4745de91a5eb834e11c00bcc3709e773173b2ce4c56c9fbde04b9c"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-syntax",
|
||||
"thread_local",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.1.9"
|
||||
@ -1611,6 +1644,12 @@ dependencies = [
|
||||
"byteorder",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.6.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3b181ba2dcf07aaccad5448e8ead58db5b742cf85dfe035e2227f137a539a189"
|
||||
|
||||
[[package]]
|
||||
name = "remove_dir_all"
|
||||
version = "0.5.3"
|
||||
@ -2137,10 +2176,11 @@ checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564"
|
||||
|
||||
[[package]]
|
||||
name = "url"
|
||||
version = "2.1.1"
|
||||
version = "2.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "829d4a8476c35c9bf0bbce5a3b23f4106f79728039b726d292bb93bc106787cb"
|
||||
checksum = "5909f2b0817350449ed73e8bcd81c8c3c8d9a7a5d8acba4b27db277f1868976e"
|
||||
dependencies = [
|
||||
"form_urlencoded",
|
||||
"idna",
|
||||
"matches",
|
||||
"percent-encoding",
|
||||
|
@ -8,7 +8,7 @@ edition = "2018"
|
||||
[dependencies]
|
||||
anyhow = "1.0.28"
|
||||
grenad = { git = "https://github.com/Kerollmops/grenad.git", rev = "3eb7ad9" }
|
||||
heed = "0.10.4"
|
||||
heed = "0.10.5"
|
||||
memmap = "0.7.0"
|
||||
milli = { path = ".." }
|
||||
once_cell = "1.4.1"
|
||||
|
@ -243,6 +243,13 @@ struct Settings {
|
||||
|
||||
#[serde(default)]
|
||||
faceted_attributes: Option<HashMap<String, String>>,
|
||||
|
||||
#[serde(
|
||||
default,
|
||||
deserialize_with = "deserialize_some",
|
||||
skip_serializing_if = "Option::is_none",
|
||||
)]
|
||||
criteria: Option<Option<Vec<String>>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
@ -399,6 +406,14 @@ async fn main() -> anyhow::Result<()> {
|
||||
builder.set_faceted_fields(facet_types);
|
||||
}
|
||||
|
||||
// We transpose the settings JSON struct into a real setting update.
|
||||
if let Some(criteria) = settings.criteria {
|
||||
match criteria {
|
||||
Some(criteria) => builder.set_criteria(criteria),
|
||||
None => builder.reset_criteria(),
|
||||
}
|
||||
}
|
||||
|
||||
let result = builder.execute(|indexing_step| {
|
||||
let (current, total) = match indexing_step {
|
||||
TransformFromUserIntoGenericFormat { documents_seen } => (documents_seen, None),
|
||||
|
@ -1,3 +1,10 @@
|
||||
use crate::{FieldsIdsMap, FieldId};
|
||||
|
||||
use anyhow::{Context, bail};
|
||||
use regex::Regex;
|
||||
use serde::{Serialize, Deserialize};
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Copy, Clone, PartialEq, Eq)]
|
||||
pub enum Criterion {
|
||||
/// Sorted by increasing number of typos.
|
||||
Typo,
|
||||
@ -14,9 +21,41 @@ pub enum Criterion {
|
||||
/// Sorted by the similarity of the matched words with the query words.
|
||||
Exactness,
|
||||
/// Sorted by the increasing value of the field specified.
|
||||
CustomAsc(String),
|
||||
Asc(FieldId),
|
||||
/// Sorted by the decreasing value of the field specified.
|
||||
CustomDesc(String),
|
||||
Desc(FieldId),
|
||||
}
|
||||
|
||||
impl Criterion {
|
||||
pub fn from_str(fields_ids_map: &mut FieldsIdsMap, txt: &str) -> anyhow::Result<Criterion> {
|
||||
match txt {
|
||||
"typo" => Ok(Criterion::Typo),
|
||||
"words" => Ok(Criterion::Words),
|
||||
"proximity" => Ok(Criterion::Proximity),
|
||||
"attribute" => Ok(Criterion::Attribute),
|
||||
"wordsposition" => Ok(Criterion::WordsPosition),
|
||||
"exactness" => Ok(Criterion::Exactness),
|
||||
text => {
|
||||
let re = Regex::new(r#"(asc|desc)\(([\w_-]+)\)"#)?;
|
||||
let caps = re.captures(text).with_context(|| format!("unknown criterion name: {}", text))?;
|
||||
let order = caps.get(1).unwrap().as_str();
|
||||
let field_name = caps.get(2).unwrap().as_str();
|
||||
let field_id = fields_ids_map.insert(field_name).context("field id limit reached")?;
|
||||
match order {
|
||||
"asc" => Ok(Criterion::Asc(field_id)),
|
||||
"desc" => Ok(Criterion::Desc(field_id)),
|
||||
otherwise => bail!("unknown criterion name: {}", otherwise),
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn field_id(&self) -> Option<FieldId> {
|
||||
match *self {
|
||||
Criterion::Asc(fid) | Criterion::Desc(fid) => Some(fid),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn default_criteria() -> Vec<Criterion> {
|
||||
|
@ -1,11 +1,12 @@
|
||||
use std::collections::BTreeMap;
|
||||
use serde::{Serialize, Deserialize};
|
||||
use crate::FieldId;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct FieldsIdsMap {
|
||||
names_ids: BTreeMap<String, u8>,
|
||||
ids_names: BTreeMap<u8, String>,
|
||||
next_id: Option<u8>,
|
||||
names_ids: BTreeMap<String, FieldId>,
|
||||
ids_names: BTreeMap<FieldId, String>,
|
||||
next_id: Option<FieldId>,
|
||||
}
|
||||
|
||||
impl FieldsIdsMap {
|
||||
@ -29,7 +30,7 @@ impl FieldsIdsMap {
|
||||
|
||||
/// Returns the field id related to a field name, it will create a new field id if the
|
||||
/// name is not already known. Returns `None` if the maximum field id as been reached.
|
||||
pub fn insert(&mut self, name: &str) -> Option<u8> {
|
||||
pub fn insert(&mut self, name: &str) -> Option<FieldId> {
|
||||
match self.names_ids.get(name) {
|
||||
Some(id) => Some(*id),
|
||||
None => {
|
||||
@ -43,17 +44,17 @@ impl FieldsIdsMap {
|
||||
}
|
||||
|
||||
/// Get the id of a field based on its name.
|
||||
pub fn id(&self, name: &str) -> Option<u8> {
|
||||
pub fn id(&self, name: &str) -> Option<FieldId> {
|
||||
self.names_ids.get(name).copied()
|
||||
}
|
||||
|
||||
/// Get the name of a field based on its id.
|
||||
pub fn name(&self, id: u8) -> Option<&str> {
|
||||
pub fn name(&self, id: FieldId) -> Option<&str> {
|
||||
self.ids_names.get(&id).map(String::as_str)
|
||||
}
|
||||
|
||||
/// Remove a field name and id based on its name.
|
||||
pub fn remove(&mut self, name: &str) -> Option<u8> {
|
||||
pub fn remove(&mut self, name: &str) -> Option<FieldId> {
|
||||
match self.names_ids.remove(name) {
|
||||
Some(id) => self.ids_names.remove_entry(&id).map(|(id, _)| id),
|
||||
None => None,
|
||||
@ -61,7 +62,7 @@ impl FieldsIdsMap {
|
||||
}
|
||||
|
||||
/// Iterate over the ids and names in the ids order.
|
||||
pub fn iter(&self) -> impl Iterator<Item=(u8, &str)> {
|
||||
pub fn iter(&self) -> impl Iterator<Item=(FieldId, &str)> {
|
||||
self.ids_names.iter().map(|(id, name)| (*id, name.as_str()))
|
||||
}
|
||||
}
|
||||
|
@ -2,12 +2,13 @@ use std::borrow::Cow;
|
||||
use std::convert::TryInto;
|
||||
|
||||
use crate::facet::value_encoding::f64_into_bytes;
|
||||
use crate::FieldId;
|
||||
|
||||
// TODO do not de/serialize right bound when level = 0
|
||||
pub struct FacetLevelValueF64Codec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for FacetLevelValueF64Codec {
|
||||
type DItem = (u8, u8, f64, f64);
|
||||
type DItem = (FieldId, u8, f64, f64);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let (field_id, bytes) = bytes.split_first()?;
|
||||
@ -27,7 +28,7 @@ impl<'a> heed::BytesDecode<'a> for FacetLevelValueF64Codec {
|
||||
}
|
||||
|
||||
impl heed::BytesEncode<'_> for FacetLevelValueF64Codec {
|
||||
type EItem = (u8, u8, f64, f64);
|
||||
type EItem = (FieldId, u8, f64, f64);
|
||||
|
||||
fn bytes_encode((field_id, level, left, right): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
let mut buffer = [0u8; 32];
|
||||
|
@ -2,11 +2,12 @@ use std::borrow::Cow;
|
||||
use std::convert::TryInto;
|
||||
|
||||
use crate::facet::value_encoding::{i64_from_bytes, i64_into_bytes};
|
||||
use crate::FieldId;
|
||||
|
||||
pub struct FacetLevelValueI64Codec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for FacetLevelValueI64Codec {
|
||||
type DItem = (u8, u8, i64, i64);
|
||||
type DItem = (FieldId, u8, i64, i64);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let (field_id, bytes) = bytes.split_first()?;
|
||||
@ -24,7 +25,7 @@ impl<'a> heed::BytesDecode<'a> for FacetLevelValueI64Codec {
|
||||
}
|
||||
|
||||
impl heed::BytesEncode<'_> for FacetLevelValueI64Codec {
|
||||
type EItem = (u8, u8, i64, i64);
|
||||
type EItem = (FieldId, u8, i64, i64);
|
||||
|
||||
fn bytes_encode((field_id, level, left, right): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
let left = i64_into_bytes(*left);
|
||||
|
@ -1,10 +1,12 @@
|
||||
use std::borrow::Cow;
|
||||
use std::str;
|
||||
|
||||
use crate::FieldId;
|
||||
|
||||
pub struct FacetValueStringCodec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for FacetValueStringCodec {
|
||||
type DItem = (u8, &'a str);
|
||||
type DItem = (FieldId, &'a str);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let (field_id, bytes) = bytes.split_first()?;
|
||||
@ -14,7 +16,7 @@ impl<'a> heed::BytesDecode<'a> for FacetValueStringCodec {
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for FacetValueStringCodec {
|
||||
type EItem = (u8, &'a str);
|
||||
type EItem = (FieldId, &'a str);
|
||||
|
||||
fn bytes_encode((field_id, value): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
let mut bytes = Vec::with_capacity(value.len() + 1);
|
||||
|
36
src/heed_codec/facet/field_doc_id_facet_f64_codec.rs
Normal file
36
src/heed_codec/facet/field_doc_id_facet_f64_codec.rs
Normal file
@ -0,0 +1,36 @@
|
||||
use std::borrow::Cow;
|
||||
use std::convert::TryInto;
|
||||
|
||||
use crate::{FieldId, DocumentId};
|
||||
use crate::facet::value_encoding::f64_into_bytes;
|
||||
|
||||
pub struct FieldDocIdFacetF64Codec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for FieldDocIdFacetF64Codec {
|
||||
type DItem = (FieldId, DocumentId, f64);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let (field_id, bytes) = bytes.split_first()?;
|
||||
|
||||
let (document_id_bytes, bytes) = bytes.split_at(4);
|
||||
let document_id = document_id_bytes.try_into().map(u32::from_be_bytes).ok()?;
|
||||
|
||||
let value = bytes[8..16].try_into().map(f64::from_be_bytes).ok()?;
|
||||
|
||||
Some((*field_id, document_id, value))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for FieldDocIdFacetF64Codec {
|
||||
type EItem = (FieldId, DocumentId, f64);
|
||||
|
||||
fn bytes_encode((field_id, document_id, value): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
let mut bytes = Vec::with_capacity(1 + 4 + 8 + 8);
|
||||
bytes.push(*field_id);
|
||||
bytes.extend_from_slice(&document_id.to_be_bytes());
|
||||
let value_bytes = f64_into_bytes(*value)?;
|
||||
bytes.extend_from_slice(&value_bytes);
|
||||
bytes.extend_from_slice(&value.to_be_bytes());
|
||||
Some(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
34
src/heed_codec/facet/field_doc_id_facet_i64_codec.rs
Normal file
34
src/heed_codec/facet/field_doc_id_facet_i64_codec.rs
Normal file
@ -0,0 +1,34 @@
|
||||
use std::borrow::Cow;
|
||||
use std::convert::TryInto;
|
||||
|
||||
use crate::facet::value_encoding::{i64_into_bytes, i64_from_bytes};
|
||||
use crate::{FieldId, DocumentId};
|
||||
|
||||
pub struct FieldDocIdFacetI64Codec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for FieldDocIdFacetI64Codec {
|
||||
type DItem = (FieldId, DocumentId, i64);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let (field_id, bytes) = bytes.split_first()?;
|
||||
|
||||
let (document_id_bytes, bytes) = bytes.split_at(4);
|
||||
let document_id = document_id_bytes.try_into().map(u32::from_be_bytes).ok()?;
|
||||
|
||||
let value = bytes[..8].try_into().map(i64_from_bytes).ok()?;
|
||||
|
||||
Some((*field_id, document_id, value))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for FieldDocIdFacetI64Codec {
|
||||
type EItem = (FieldId, DocumentId, i64);
|
||||
|
||||
fn bytes_encode((field_id, document_id, value): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
let mut bytes = Vec::with_capacity(1 + 4 + 8);
|
||||
bytes.push(*field_id);
|
||||
bytes.extend_from_slice(&document_id.to_be_bytes());
|
||||
bytes.extend_from_slice(&i64_into_bytes(*value));
|
||||
Some(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
31
src/heed_codec/facet/field_doc_id_facet_string_codec.rs
Normal file
31
src/heed_codec/facet/field_doc_id_facet_string_codec.rs
Normal file
@ -0,0 +1,31 @@
|
||||
use std::borrow::Cow;
|
||||
use std::convert::TryInto;
|
||||
use std::str;
|
||||
|
||||
use crate::{FieldId, DocumentId};
|
||||
|
||||
pub struct FieldDocIdFacetStringCodec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for FieldDocIdFacetStringCodec {
|
||||
type DItem = (FieldId, DocumentId, &'a str);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let (field_id, bytes) = bytes.split_first()?;
|
||||
let (document_id_bytes, bytes) = bytes.split_at(4);
|
||||
let document_id = document_id_bytes.try_into().map(u32::from_be_bytes).ok()?;
|
||||
let value = str::from_utf8(bytes).ok()?;
|
||||
Some((*field_id, document_id, value))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for FieldDocIdFacetStringCodec {
|
||||
type EItem = (FieldId, DocumentId, &'a str);
|
||||
|
||||
fn bytes_encode((field_id, document_id, value): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
let mut bytes = Vec::with_capacity(1 + 4 + value.len());
|
||||
bytes.push(*field_id);
|
||||
bytes.extend_from_slice(&document_id.to_be_bytes());
|
||||
bytes.extend_from_slice(value.as_bytes());
|
||||
Some(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
@ -1,7 +1,13 @@
|
||||
mod facet_level_value_f64_codec;
|
||||
mod facet_level_value_i64_codec;
|
||||
mod facet_value_string_codec;
|
||||
mod field_doc_id_facet_f64_codec;
|
||||
mod field_doc_id_facet_i64_codec;
|
||||
mod field_doc_id_facet_string_codec;
|
||||
|
||||
pub use self::facet_level_value_f64_codec::FacetLevelValueF64Codec;
|
||||
pub use self::facet_level_value_i64_codec::FacetLevelValueI64Codec;
|
||||
pub use self::facet_value_string_codec::FacetValueStringCodec;
|
||||
pub use self::field_doc_id_facet_f64_codec::FieldDocIdFacetF64Codec;
|
||||
pub use self::field_doc_id_facet_i64_codec::FieldDocIdFacetI64Codec;
|
||||
pub use self::field_doc_id_facet_string_codec::FieldDocIdFacetStringCodec;
|
||||
|
52
src/index.rs
52
src/index.rs
@ -9,13 +9,14 @@ use roaring::RoaringBitmap;
|
||||
|
||||
use crate::facet::FacetType;
|
||||
use crate::fields_ids_map::FieldsIdsMap;
|
||||
use crate::Search;
|
||||
use crate::{BEU32, DocumentId, ExternalDocumentsIds};
|
||||
use crate::{default_criteria, Criterion, Search};
|
||||
use crate::{BEU32, DocumentId, FieldId, ExternalDocumentsIds};
|
||||
use crate::{
|
||||
RoaringBitmapCodec, BEU32StrCodec, StrStrU8Codec, ObkvCodec,
|
||||
BoRoaringBitmapCodec, CboRoaringBitmapCodec,
|
||||
};
|
||||
|
||||
pub const CRITERIA_KEY: &str = "criteria";
|
||||
pub const DISPLAYED_FIELDS_KEY: &str = "displayed-fields";
|
||||
pub const DOCUMENTS_IDS_KEY: &str = "documents-ids";
|
||||
pub const FACETED_DOCUMENTS_IDS_PREFIX: &str = "faceted-documents-ids";
|
||||
@ -41,13 +42,15 @@ pub struct Index {
|
||||
pub word_pair_proximity_docids: Database<StrStrU8Codec, CboRoaringBitmapCodec>,
|
||||
/// Maps the facet field id and the globally ordered value with the docids that corresponds to it.
|
||||
pub facet_field_id_value_docids: Database<ByteSlice, CboRoaringBitmapCodec>,
|
||||
/// Maps the document id, the facet field id and the globally ordered value.
|
||||
pub field_id_docid_facet_values: Database<ByteSlice, Unit>,
|
||||
/// Maps the document id to the document as an obkv store.
|
||||
pub documents: Database<OwnedType<BEU32>, ObkvCodec>,
|
||||
}
|
||||
|
||||
impl Index {
|
||||
pub fn new<P: AsRef<Path>>(mut options: heed::EnvOpenOptions, path: P) -> anyhow::Result<Index> {
|
||||
options.max_dbs(6);
|
||||
options.max_dbs(7);
|
||||
|
||||
let env = options.open(path)?;
|
||||
let main = env.create_poly_database(Some("main"))?;
|
||||
@ -55,6 +58,7 @@ impl Index {
|
||||
let docid_word_positions = env.create_database(Some("docid-word-positions"))?;
|
||||
let word_pair_proximity_docids = env.create_database(Some("word-pair-proximity-docids"))?;
|
||||
let facet_field_id_value_docids = env.create_database(Some("facet-field-id-value-docids"))?;
|
||||
let field_id_docid_facet_values = env.create_database(Some("field-id-docid-facet-values"))?;
|
||||
let documents = env.create_database(Some("documents"))?;
|
||||
|
||||
Ok(Index {
|
||||
@ -64,6 +68,7 @@ impl Index {
|
||||
docid_word_positions,
|
||||
word_pair_proximity_docids,
|
||||
facet_field_id_value_docids,
|
||||
field_id_docid_facet_values,
|
||||
documents,
|
||||
})
|
||||
}
|
||||
@ -107,8 +112,8 @@ impl Index {
|
||||
/* primary key */
|
||||
|
||||
/// Writes the documents primary key, this is the field name that is used to store the id.
|
||||
pub fn put_primary_key(&self, wtxn: &mut RwTxn, primary_key: u8) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, OwnedType<u8>>(wtxn, PRIMARY_KEY_KEY, &primary_key)
|
||||
pub fn put_primary_key(&self, wtxn: &mut RwTxn, primary_key: FieldId) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, OwnedType<FieldId>>(wtxn, PRIMARY_KEY_KEY, &primary_key)
|
||||
}
|
||||
|
||||
/// Deletes the primary key of the documents, this can be done to reset indexes settings.
|
||||
@ -117,8 +122,8 @@ impl Index {
|
||||
}
|
||||
|
||||
/// Returns the documents primary key, `None` if it hasn't been defined.
|
||||
pub fn primary_key(&self, rtxn: &RoTxn) -> heed::Result<Option<u8>> {
|
||||
self.main.get::<_, Str, OwnedType<u8>>(rtxn, PRIMARY_KEY_KEY)
|
||||
pub fn primary_key(&self, rtxn: &RoTxn) -> heed::Result<Option<FieldId>> {
|
||||
self.main.get::<_, Str, OwnedType<FieldId>>(rtxn, PRIMARY_KEY_KEY)
|
||||
}
|
||||
|
||||
/* external documents ids */
|
||||
@ -172,7 +177,7 @@ impl Index {
|
||||
|
||||
/// Writes the fields ids that must be displayed in the defined order.
|
||||
/// There must be not be any duplicate field id.
|
||||
pub fn put_displayed_fields(&self, wtxn: &mut RwTxn, fields: &[u8]) -> heed::Result<()> {
|
||||
pub fn put_displayed_fields(&self, wtxn: &mut RwTxn, fields: &[FieldId]) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, ByteSlice>(wtxn, DISPLAYED_FIELDS_KEY, fields)
|
||||
}
|
||||
|
||||
@ -184,14 +189,14 @@ impl Index {
|
||||
|
||||
/// Returns the displayed fields ids in the order they must be returned. If it returns
|
||||
/// `None` it means that all the attributes are displayed in the order of the `FieldsIdsMap`.
|
||||
pub fn displayed_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<&'t [u8]>> {
|
||||
pub fn displayed_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<&'t [FieldId]>> {
|
||||
self.main.get::<_, Str, ByteSlice>(rtxn, DISPLAYED_FIELDS_KEY)
|
||||
}
|
||||
|
||||
/* searchable fields */
|
||||
|
||||
/// Writes the searchable fields, when this list is specified, only these are indexed.
|
||||
pub fn put_searchable_fields(&self, wtxn: &mut RwTxn, fields: &[u8]) -> heed::Result<()> {
|
||||
pub fn put_searchable_fields(&self, wtxn: &mut RwTxn, fields: &[FieldId]) -> heed::Result<()> {
|
||||
assert!(fields.windows(2).all(|win| win[0] < win[1])); // is sorted
|
||||
self.main.put::<_, Str, ByteSlice>(wtxn, SEARCHABLE_FIELDS_KEY, fields)
|
||||
}
|
||||
@ -203,7 +208,7 @@ impl Index {
|
||||
|
||||
/// Returns the searchable fields ids, those are the fields that are indexed,
|
||||
/// if the searchable fields aren't there it means that **all** the fields are indexed.
|
||||
pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<&'t [u8]>> {
|
||||
pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<&'t [FieldId]>> {
|
||||
self.main.get::<_, Str, ByteSlice>(rtxn, SEARCHABLE_FIELDS_KEY)
|
||||
}
|
||||
|
||||
@ -211,7 +216,7 @@ impl Index {
|
||||
|
||||
/// Writes the facet fields ids associated with their facet type or `None` if
|
||||
/// the facet type is currently unknown.
|
||||
pub fn put_faceted_fields(&self, wtxn: &mut RwTxn, fields_types: &HashMap<u8, FacetType>) -> heed::Result<()> {
|
||||
pub fn put_faceted_fields(&self, wtxn: &mut RwTxn, fields_types: &HashMap<FieldId, FacetType>) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeJson<_>>(wtxn, FACETED_FIELDS_KEY, fields_types)
|
||||
}
|
||||
|
||||
@ -221,14 +226,14 @@ impl Index {
|
||||
}
|
||||
|
||||
/// Returns the facet fields ids associated with their facet type.
|
||||
pub fn faceted_fields(&self, wtxn: &RoTxn) -> heed::Result<HashMap<u8, FacetType>> {
|
||||
pub fn faceted_fields(&self, wtxn: &RoTxn) -> heed::Result<HashMap<FieldId, FacetType>> {
|
||||
Ok(self.main.get::<_, Str, SerdeJson<_>>(wtxn, FACETED_FIELDS_KEY)?.unwrap_or_default())
|
||||
}
|
||||
|
||||
/* faceted documents ids */
|
||||
|
||||
/// Writes the documents ids that are faceted under this field id.
|
||||
pub fn put_faceted_documents_ids(&self, wtxn: &mut RwTxn, field_id: u8, docids: &RoaringBitmap) -> heed::Result<()> {
|
||||
pub fn put_faceted_documents_ids(&self, wtxn: &mut RwTxn, field_id: FieldId, docids: &RoaringBitmap) -> heed::Result<()> {
|
||||
let mut buffer = [0u8; FACETED_DOCUMENTS_IDS_PREFIX.len() + 1];
|
||||
buffer[..FACETED_DOCUMENTS_IDS_PREFIX.len()].clone_from_slice(FACETED_DOCUMENTS_IDS_PREFIX.as_bytes());
|
||||
*buffer.last_mut().unwrap() = field_id;
|
||||
@ -236,7 +241,7 @@ impl Index {
|
||||
}
|
||||
|
||||
/// Retrieve all the documents ids that faceted under this field id.
|
||||
pub fn faceted_documents_ids(&self, rtxn: &RoTxn, field_id: u8) -> heed::Result<RoaringBitmap> {
|
||||
pub fn faceted_documents_ids(&self, rtxn: &RoTxn, field_id: FieldId) -> heed::Result<RoaringBitmap> {
|
||||
let mut buffer = [0u8; FACETED_DOCUMENTS_IDS_PREFIX.len() + 1];
|
||||
buffer[..FACETED_DOCUMENTS_IDS_PREFIX.len()].clone_from_slice(FACETED_DOCUMENTS_IDS_PREFIX.as_bytes());
|
||||
*buffer.last_mut().unwrap() = field_id;
|
||||
@ -246,6 +251,23 @@ impl Index {
|
||||
}
|
||||
}
|
||||
|
||||
/* criteria */
|
||||
|
||||
pub fn put_criteria(&self, wtxn: &mut RwTxn, criteria: &[Criterion]) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeJson<&[Criterion]>>(wtxn, CRITERIA_KEY, &criteria)
|
||||
}
|
||||
|
||||
pub fn delete_criteria(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, CRITERIA_KEY)
|
||||
}
|
||||
|
||||
pub fn criteria(&self, rtxn: &RoTxn) -> heed::Result<Vec<Criterion>> {
|
||||
match self.main.get::<_, Str, SerdeJson<Vec<Criterion>>>(rtxn, CRITERIA_KEY)? {
|
||||
Some(criteria) => Ok(criteria),
|
||||
None => Ok(default_criteria()),
|
||||
}
|
||||
}
|
||||
|
||||
/* words fst */
|
||||
|
||||
/// Writes the FST which is the words dictionnary of the engine.
|
||||
|
@ -40,15 +40,16 @@ pub type SmallVec16<T> = smallvec::SmallVec<[T; 16]>;
|
||||
pub type SmallVec8<T> = smallvec::SmallVec<[T; 8]>;
|
||||
pub type BEU32 = heed::zerocopy::U32<heed::byteorder::BE>;
|
||||
pub type BEU64 = heed::zerocopy::U64<heed::byteorder::BE>;
|
||||
pub type DocumentId = u32;
|
||||
pub type Attribute = u32;
|
||||
pub type DocumentId = u32;
|
||||
pub type FieldId = u8;
|
||||
pub type Position = u32;
|
||||
|
||||
type MergeFn = for<'a> fn(&[u8], &[Cow<'a, [u8]>]) -> anyhow::Result<Vec<u8>>;
|
||||
|
||||
/// Transform a raw obkv store into a JSON Object.
|
||||
pub fn obkv_to_json(
|
||||
displayed_fields: &[u8],
|
||||
displayed_fields: &[FieldId],
|
||||
fields_ids_map: &FieldsIdsMap,
|
||||
obkv: obkv::KvReader,
|
||||
) -> anyhow::Result<Map<String, Value>>
|
||||
|
643
src/search/facet/facet_condition.rs
Normal file
643
src/search/facet/facet_condition.rs
Normal file
@ -0,0 +1,643 @@
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::Debug;
|
||||
use std::ops::Bound::{self, Included, Excluded};
|
||||
use std::str::FromStr;
|
||||
|
||||
use heed::types::{ByteSlice, DecodeIgnore};
|
||||
use log::debug;
|
||||
use num_traits::Bounded;
|
||||
use pest::error::{Error as PestError, ErrorVariant};
|
||||
use pest::iterators::{Pair, Pairs};
|
||||
use pest::Parser;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::facet::FacetValueStringCodec;
|
||||
use crate::heed_codec::facet::{FacetLevelValueI64Codec, FacetLevelValueF64Codec};
|
||||
use crate::{Index, FieldId, FieldsIdsMap, CboRoaringBitmapCodec};
|
||||
|
||||
use super::FacetRange;
|
||||
use super::parser::Rule;
|
||||
use super::parser::{PREC_CLIMBER, FilterParser};
|
||||
|
||||
use self::FacetCondition::*;
|
||||
use self::FacetNumberOperator::*;
|
||||
|
||||
#[derive(Debug, Copy, Clone, PartialEq)]
|
||||
pub enum FacetNumberOperator<T> {
|
||||
GreaterThan(T),
|
||||
GreaterThanOrEqual(T),
|
||||
Equal(T),
|
||||
NotEqual(T),
|
||||
LowerThan(T),
|
||||
LowerThanOrEqual(T),
|
||||
Between(T, T),
|
||||
}
|
||||
|
||||
impl<T> FacetNumberOperator<T> {
|
||||
/// This method can return two operations in case it must express
|
||||
/// an OR operation for the between case (i.e. `TO`).
|
||||
fn negate(self) -> (Self, Option<Self>) {
|
||||
match self {
|
||||
GreaterThan(x) => (LowerThanOrEqual(x), None),
|
||||
GreaterThanOrEqual(x) => (LowerThan(x), None),
|
||||
Equal(x) => (NotEqual(x), None),
|
||||
NotEqual(x) => (Equal(x), None),
|
||||
LowerThan(x) => (GreaterThanOrEqual(x), None),
|
||||
LowerThanOrEqual(x) => (GreaterThan(x), None),
|
||||
Between(x, y) => (LowerThan(x), Some(GreaterThan(y))),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum FacetStringOperator {
|
||||
Equal(String),
|
||||
NotEqual(String),
|
||||
}
|
||||
|
||||
impl FacetStringOperator {
|
||||
fn equal(s: &str) -> Self {
|
||||
FacetStringOperator::Equal(s.to_lowercase())
|
||||
}
|
||||
|
||||
fn not_equal(s: &str) -> Self {
|
||||
FacetStringOperator::equal(s).negate()
|
||||
}
|
||||
|
||||
fn negate(self) -> Self {
|
||||
match self {
|
||||
FacetStringOperator::Equal(x) => FacetStringOperator::NotEqual(x),
|
||||
FacetStringOperator::NotEqual(x) => FacetStringOperator::Equal(x),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum FacetCondition {
|
||||
OperatorI64(FieldId, FacetNumberOperator<i64>),
|
||||
OperatorF64(FieldId, FacetNumberOperator<f64>),
|
||||
OperatorString(FieldId, FacetStringOperator),
|
||||
Or(Box<Self>, Box<Self>),
|
||||
And(Box<Self>, Box<Self>),
|
||||
}
|
||||
|
||||
fn get_field_id_facet_type<'a>(
|
||||
fields_ids_map: &FieldsIdsMap,
|
||||
faceted_fields: &HashMap<FieldId, FacetType>,
|
||||
items: &mut Pairs<'a, Rule>,
|
||||
) -> Result<(FieldId, FacetType), PestError<Rule>>
|
||||
{
|
||||
// lexing ensures that we at least have a key
|
||||
let key = items.next().unwrap();
|
||||
let field_id = fields_ids_map
|
||||
.id(key.as_str())
|
||||
.ok_or_else(|| {
|
||||
PestError::new_from_span(
|
||||
ErrorVariant::CustomError {
|
||||
message: format!(
|
||||
"attribute `{}` not found, available attributes are: {}",
|
||||
key.as_str(),
|
||||
fields_ids_map.iter().map(|(_, n)| n).collect::<Vec<_>>().join(", ")
|
||||
),
|
||||
},
|
||||
key.as_span(),
|
||||
)
|
||||
})?;
|
||||
|
||||
let facet_type = faceted_fields
|
||||
.get(&field_id)
|
||||
.copied()
|
||||
.ok_or_else(|| {
|
||||
PestError::new_from_span(
|
||||
ErrorVariant::CustomError {
|
||||
message: format!(
|
||||
"attribute `{}` is not faceted, available faceted attributes are: {}",
|
||||
key.as_str(),
|
||||
faceted_fields.keys().flat_map(|id| fields_ids_map.name(*id)).collect::<Vec<_>>().join(", ")
|
||||
),
|
||||
},
|
||||
key.as_span(),
|
||||
)
|
||||
})?;
|
||||
|
||||
Ok((field_id, facet_type))
|
||||
}
|
||||
|
||||
fn pest_parse<T>(pair: Pair<Rule>) -> Result<T, pest::error::Error<Rule>>
|
||||
where T: FromStr,
|
||||
T::Err: ToString,
|
||||
{
|
||||
match pair.as_str().parse() {
|
||||
Ok(value) => Ok(value),
|
||||
Err(e) => {
|
||||
Err(PestError::<Rule>::new_from_span(
|
||||
ErrorVariant::CustomError { message: e.to_string() },
|
||||
pair.as_span(),
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FacetCondition {
|
||||
pub fn from_str(
|
||||
rtxn: &heed::RoTxn,
|
||||
index: &Index,
|
||||
expression: &str,
|
||||
) -> anyhow::Result<FacetCondition>
|
||||
{
|
||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||
let faceted_fields = index.faceted_fields(rtxn)?;
|
||||
let lexed = FilterParser::parse(Rule::prgm, expression)?;
|
||||
FacetCondition::from_pairs(&fields_ids_map, &faceted_fields, lexed)
|
||||
}
|
||||
|
||||
fn from_pairs(
|
||||
fim: &FieldsIdsMap,
|
||||
ff: &HashMap<FieldId, FacetType>,
|
||||
expression: Pairs<Rule>,
|
||||
) -> anyhow::Result<Self>
|
||||
{
|
||||
PREC_CLIMBER.climb(
|
||||
expression,
|
||||
|pair: Pair<Rule>| match pair.as_rule() {
|
||||
Rule::greater => Ok(Self::greater_than(fim, ff, pair)?),
|
||||
Rule::geq => Ok(Self::greater_than_or_equal(fim, ff, pair)?),
|
||||
Rule::eq => Ok(Self::equal(fim, ff, pair)?),
|
||||
Rule::neq => Ok(Self::equal(fim, ff, pair)?.negate()),
|
||||
Rule::leq => Ok(Self::lower_than_or_equal(fim, ff, pair)?),
|
||||
Rule::less => Ok(Self::lower_than(fim, ff, pair)?),
|
||||
Rule::between => Ok(Self::between(fim, ff, pair)?),
|
||||
Rule::not => Ok(Self::from_pairs(fim, ff, pair.into_inner())?.negate()),
|
||||
Rule::prgm => Self::from_pairs(fim, ff, pair.into_inner()),
|
||||
Rule::term => Self::from_pairs(fim, ff, pair.into_inner()),
|
||||
_ => unreachable!(),
|
||||
},
|
||||
|lhs: anyhow::Result<Self>, op: Pair<Rule>, rhs: anyhow::Result<Self>| {
|
||||
match op.as_rule() {
|
||||
Rule::or => Ok(Or(Box::new(lhs?), Box::new(rhs?))),
|
||||
Rule::and => Ok(And(Box::new(lhs?), Box::new(rhs?))),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
fn negate(self) -> FacetCondition {
|
||||
match self {
|
||||
OperatorI64(fid, op) => match op.negate() {
|
||||
(op, None) => OperatorI64(fid, op),
|
||||
(a, Some(b)) => Or(Box::new(OperatorI64(fid, a)), Box::new(OperatorI64(fid, b))),
|
||||
},
|
||||
OperatorF64(fid, op) => match op.negate() {
|
||||
(op, None) => OperatorF64(fid, op),
|
||||
(a, Some(b)) => Or(Box::new(OperatorF64(fid, a)), Box::new(OperatorF64(fid, b))),
|
||||
},
|
||||
OperatorString(fid, op) => OperatorString(fid, op.negate()),
|
||||
Or(a, b) => And(Box::new(a.negate()), Box::new(b.negate())),
|
||||
And(a, b) => Or(Box::new(a.negate()), Box::new(b.negate())),
|
||||
}
|
||||
}
|
||||
|
||||
fn between(
|
||||
fields_ids_map: &FieldsIdsMap,
|
||||
faceted_fields: &HashMap<FieldId, FacetType>,
|
||||
item: Pair<Rule>,
|
||||
) -> anyhow::Result<FacetCondition>
|
||||
{
|
||||
let item_span = item.as_span();
|
||||
let mut items = item.into_inner();
|
||||
let (fid, ftype) = get_field_id_facet_type(fields_ids_map, faceted_fields, &mut items)?;
|
||||
let lvalue = items.next().unwrap();
|
||||
let rvalue = items.next().unwrap();
|
||||
match ftype {
|
||||
FacetType::Integer => {
|
||||
let lvalue = pest_parse(lvalue)?;
|
||||
let rvalue = pest_parse(rvalue)?;
|
||||
Ok(OperatorI64(fid, Between(lvalue, rvalue)))
|
||||
},
|
||||
FacetType::Float => {
|
||||
let lvalue = pest_parse(lvalue)?;
|
||||
let rvalue = pest_parse(rvalue)?;
|
||||
Ok(OperatorF64(fid, Between(lvalue, rvalue)))
|
||||
},
|
||||
FacetType::String => {
|
||||
Err(PestError::<Rule>::new_from_span(
|
||||
ErrorVariant::CustomError {
|
||||
message: "invalid operator on a faceted string".to_string(),
|
||||
},
|
||||
item_span,
|
||||
).into())
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn equal(
|
||||
fields_ids_map: &FieldsIdsMap,
|
||||
faceted_fields: &HashMap<FieldId, FacetType>,
|
||||
item: Pair<Rule>,
|
||||
) -> anyhow::Result<FacetCondition>
|
||||
{
|
||||
let mut items = item.into_inner();
|
||||
let (fid, ftype) = get_field_id_facet_type(fields_ids_map, faceted_fields, &mut items)?;
|
||||
let value = items.next().unwrap();
|
||||
match ftype {
|
||||
FacetType::Integer => Ok(OperatorI64(fid, Equal(pest_parse(value)?))),
|
||||
FacetType::Float => Ok(OperatorF64(fid, Equal(pest_parse(value)?))),
|
||||
FacetType::String => Ok(OperatorString(fid, FacetStringOperator::equal(value.as_str()))),
|
||||
}
|
||||
}
|
||||
|
||||
fn greater_than(
|
||||
fields_ids_map: &FieldsIdsMap,
|
||||
faceted_fields: &HashMap<FieldId, FacetType>,
|
||||
item: Pair<Rule>,
|
||||
) -> anyhow::Result<FacetCondition>
|
||||
{
|
||||
let item_span = item.as_span();
|
||||
let mut items = item.into_inner();
|
||||
let (fid, ftype) = get_field_id_facet_type(fields_ids_map, faceted_fields, &mut items)?;
|
||||
let value = items.next().unwrap();
|
||||
match ftype {
|
||||
FacetType::Integer => Ok(OperatorI64(fid, GreaterThan(pest_parse(value)?))),
|
||||
FacetType::Float => Ok(OperatorF64(fid, GreaterThan(pest_parse(value)?))),
|
||||
FacetType::String => {
|
||||
Err(PestError::<Rule>::new_from_span(
|
||||
ErrorVariant::CustomError {
|
||||
message: "invalid operator on a faceted string".to_string(),
|
||||
},
|
||||
item_span,
|
||||
).into())
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn greater_than_or_equal(
|
||||
fields_ids_map: &FieldsIdsMap,
|
||||
faceted_fields: &HashMap<FieldId, FacetType>,
|
||||
item: Pair<Rule>,
|
||||
) -> anyhow::Result<FacetCondition>
|
||||
{
|
||||
let item_span = item.as_span();
|
||||
let mut items = item.into_inner();
|
||||
let (fid, ftype) = get_field_id_facet_type(fields_ids_map, faceted_fields, &mut items)?;
|
||||
let value = items.next().unwrap();
|
||||
match ftype {
|
||||
FacetType::Integer => Ok(OperatorI64(fid, GreaterThanOrEqual(pest_parse(value)?))),
|
||||
FacetType::Float => Ok(OperatorF64(fid, GreaterThanOrEqual(pest_parse(value)?))),
|
||||
FacetType::String => {
|
||||
Err(PestError::<Rule>::new_from_span(
|
||||
ErrorVariant::CustomError {
|
||||
message: "invalid operator on a faceted string".to_string(),
|
||||
},
|
||||
item_span,
|
||||
).into())
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn lower_than(
|
||||
fields_ids_map: &FieldsIdsMap,
|
||||
faceted_fields: &HashMap<FieldId, FacetType>,
|
||||
item: Pair<Rule>,
|
||||
) -> anyhow::Result<FacetCondition>
|
||||
{
|
||||
let item_span = item.as_span();
|
||||
let mut items = item.into_inner();
|
||||
let (fid, ftype) = get_field_id_facet_type(fields_ids_map, faceted_fields, &mut items)?;
|
||||
let value = items.next().unwrap();
|
||||
match ftype {
|
||||
FacetType::Integer => Ok(OperatorI64(fid, LowerThan(pest_parse(value)?))),
|
||||
FacetType::Float => Ok(OperatorF64(fid, LowerThan(pest_parse(value)?))),
|
||||
FacetType::String => {
|
||||
Err(PestError::<Rule>::new_from_span(
|
||||
ErrorVariant::CustomError {
|
||||
message: "invalid operator on a faceted string".to_string(),
|
||||
},
|
||||
item_span,
|
||||
).into())
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn lower_than_or_equal(
|
||||
fields_ids_map: &FieldsIdsMap,
|
||||
faceted_fields: &HashMap<FieldId, FacetType>,
|
||||
item: Pair<Rule>,
|
||||
) -> anyhow::Result<FacetCondition>
|
||||
{
|
||||
let item_span = item.as_span();
|
||||
let mut items = item.into_inner();
|
||||
let (fid, ftype) = get_field_id_facet_type(fields_ids_map, faceted_fields, &mut items)?;
|
||||
let value = items.next().unwrap();
|
||||
match ftype {
|
||||
FacetType::Integer => Ok(OperatorI64(fid, LowerThanOrEqual(pest_parse(value)?))),
|
||||
FacetType::Float => Ok(OperatorF64(fid, LowerThanOrEqual(pest_parse(value)?))),
|
||||
FacetType::String => {
|
||||
Err(PestError::<Rule>::new_from_span(
|
||||
ErrorVariant::CustomError {
|
||||
message: "invalid operator on a faceted string".to_string(),
|
||||
},
|
||||
item_span,
|
||||
).into())
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FacetCondition {
|
||||
/// Aggregates the documents ids that are part of the specified range automatically
|
||||
/// going deeper through the levels.
|
||||
fn explore_facet_levels<'t, T: 't, KC>(
|
||||
rtxn: &'t heed::RoTxn,
|
||||
db: heed::Database<ByteSlice, CboRoaringBitmapCodec>,
|
||||
field_id: FieldId,
|
||||
level: u8,
|
||||
left: Bound<T>,
|
||||
right: Bound<T>,
|
||||
output: &mut RoaringBitmap,
|
||||
) -> anyhow::Result<()>
|
||||
where
|
||||
T: Copy + PartialEq + PartialOrd + Bounded + Debug,
|
||||
KC: heed::BytesDecode<'t, DItem = (u8, u8, T, T)>,
|
||||
KC: for<'x> heed::BytesEncode<'x, EItem = (u8, u8, T, T)>,
|
||||
{
|
||||
match (left, right) {
|
||||
// If the request is an exact value we must go directly to the deepest level.
|
||||
(Included(l), Included(r)) if l == r && level > 0 => {
|
||||
return Self::explore_facet_levels::<T, KC>(rtxn, db, field_id, 0, left, right, output);
|
||||
},
|
||||
// lower TO upper when lower > upper must return no result
|
||||
(Included(l), Included(r)) if l > r => return Ok(()),
|
||||
(Included(l), Excluded(r)) if l >= r => return Ok(()),
|
||||
(Excluded(l), Excluded(r)) if l >= r => return Ok(()),
|
||||
(Excluded(l), Included(r)) if l >= r => return Ok(()),
|
||||
(_, _) => (),
|
||||
}
|
||||
|
||||
let mut left_found = None;
|
||||
let mut right_found = None;
|
||||
|
||||
// We must create a custom iterator to be able to iterate over the
|
||||
// requested range as the range iterator cannot express some conditions.
|
||||
let iter = FacetRange::new(rtxn, db.remap_key_type::<KC>(), field_id, level, left, right)?;
|
||||
|
||||
debug!("Iterating between {:?} and {:?} (level {})", left, right, level);
|
||||
|
||||
for (i, result) in iter.enumerate() {
|
||||
let ((_fid, level, l, r), docids) = result?;
|
||||
debug!("{:?} to {:?} (level {}) found {} documents", l, r, level, docids.len());
|
||||
output.union_with(&docids);
|
||||
// We save the leftest and rightest bounds we actually found at this level.
|
||||
if i == 0 { left_found = Some(l); }
|
||||
right_found = Some(r);
|
||||
}
|
||||
|
||||
// Can we go deeper?
|
||||
let deeper_level = match level.checked_sub(1) {
|
||||
Some(level) => level,
|
||||
None => return Ok(()),
|
||||
};
|
||||
|
||||
// We must refine the left and right bounds of this range by retrieving the
|
||||
// missing part in a deeper level.
|
||||
match left_found.zip(right_found) {
|
||||
Some((left_found, right_found)) => {
|
||||
// If the bound is satisfied we avoid calling this function again.
|
||||
if !matches!(left, Included(l) if l == left_found) {
|
||||
let sub_right = Excluded(left_found);
|
||||
debug!("calling left with {:?} to {:?} (level {})", left, sub_right, deeper_level);
|
||||
Self::explore_facet_levels::<T, KC>(rtxn, db, field_id, deeper_level, left, sub_right, output)?;
|
||||
}
|
||||
if !matches!(right, Included(r) if r == right_found) {
|
||||
let sub_left = Excluded(right_found);
|
||||
debug!("calling right with {:?} to {:?} (level {})", sub_left, right, deeper_level);
|
||||
Self::explore_facet_levels::<T, KC>(rtxn, db, field_id, deeper_level, sub_left, right, output)?;
|
||||
}
|
||||
},
|
||||
None => {
|
||||
// If we found nothing at this level it means that we must find
|
||||
// the same bounds but at a deeper, more precise level.
|
||||
Self::explore_facet_levels::<T, KC>(rtxn, db, field_id, deeper_level, left, right, output)?;
|
||||
},
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn evaluate_number_operator<'t, T: 't, KC>(
|
||||
rtxn: &'t heed::RoTxn,
|
||||
index: &Index,
|
||||
db: heed::Database<ByteSlice, CboRoaringBitmapCodec>,
|
||||
field_id: FieldId,
|
||||
operator: FacetNumberOperator<T>,
|
||||
) -> anyhow::Result<RoaringBitmap>
|
||||
where
|
||||
T: Copy + PartialEq + PartialOrd + Bounded + Debug,
|
||||
KC: heed::BytesDecode<'t, DItem = (u8, u8, T, T)>,
|
||||
KC: for<'x> heed::BytesEncode<'x, EItem = (u8, u8, T, T)>,
|
||||
{
|
||||
// Make sure we always bound the ranges with the field id and the level,
|
||||
// as the facets values are all in the same database and prefixed by the
|
||||
// field id and the level.
|
||||
let (left, right) = match operator {
|
||||
GreaterThan(val) => (Excluded(val), Included(T::max_value())),
|
||||
GreaterThanOrEqual(val) => (Included(val), Included(T::max_value())),
|
||||
Equal(val) => (Included(val), Included(val)),
|
||||
NotEqual(val) => {
|
||||
let all_documents_ids = index.faceted_documents_ids(rtxn, field_id)?;
|
||||
let docids = Self::evaluate_number_operator::<T, KC>(rtxn, index, db, field_id, Equal(val))?;
|
||||
return Ok(all_documents_ids - docids);
|
||||
},
|
||||
LowerThan(val) => (Included(T::min_value()), Excluded(val)),
|
||||
LowerThanOrEqual(val) => (Included(T::min_value()), Included(val)),
|
||||
Between(left, right) => (Included(left), Included(right)),
|
||||
};
|
||||
|
||||
// Ask for the biggest value that can exist for this specific field, if it exists
|
||||
// that's fine if it don't, the value just before will be returned instead.
|
||||
let biggest_level = db
|
||||
.remap_types::<KC, DecodeIgnore>()
|
||||
.get_lower_than_or_equal_to(rtxn, &(field_id, u8::MAX, T::max_value(), T::max_value()))?
|
||||
.and_then(|((id, level, _, _), _)| if id == field_id { Some(level) } else { None });
|
||||
|
||||
match biggest_level {
|
||||
Some(level) => {
|
||||
let mut output = RoaringBitmap::new();
|
||||
Self::explore_facet_levels::<T, KC>(rtxn, db, field_id, level, left, right, &mut output)?;
|
||||
Ok(output)
|
||||
},
|
||||
None => Ok(RoaringBitmap::new()),
|
||||
}
|
||||
}
|
||||
|
||||
fn evaluate_string_operator(
|
||||
rtxn: &heed::RoTxn,
|
||||
index: &Index,
|
||||
db: heed::Database<FacetValueStringCodec, CboRoaringBitmapCodec>,
|
||||
field_id: FieldId,
|
||||
operator: &FacetStringOperator,
|
||||
) -> anyhow::Result<RoaringBitmap>
|
||||
{
|
||||
match operator {
|
||||
FacetStringOperator::Equal(string) => {
|
||||
match db.get(rtxn, &(field_id, string))? {
|
||||
Some(docids) => Ok(docids),
|
||||
None => Ok(RoaringBitmap::new())
|
||||
}
|
||||
},
|
||||
FacetStringOperator::NotEqual(string) => {
|
||||
let all_documents_ids = index.faceted_documents_ids(rtxn, field_id)?;
|
||||
let op = FacetStringOperator::Equal(string.clone());
|
||||
let docids = Self::evaluate_string_operator(rtxn, index, db, field_id, &op)?;
|
||||
Ok(all_documents_ids - docids)
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn evaluate(
|
||||
&self,
|
||||
rtxn: &heed::RoTxn,
|
||||
index: &Index,
|
||||
) -> anyhow::Result<RoaringBitmap>
|
||||
{
|
||||
let db = index.facet_field_id_value_docids;
|
||||
match self {
|
||||
OperatorI64(fid, op) => {
|
||||
Self::evaluate_number_operator::<i64, FacetLevelValueI64Codec>(rtxn, index, db, *fid, *op)
|
||||
},
|
||||
OperatorF64(fid, op) => {
|
||||
Self::evaluate_number_operator::<f64, FacetLevelValueF64Codec>(rtxn, index, db, *fid, *op)
|
||||
},
|
||||
OperatorString(fid, op) => {
|
||||
let db = db.remap_key_type::<FacetValueStringCodec>();
|
||||
Self::evaluate_string_operator(rtxn, index, db, *fid, op)
|
||||
},
|
||||
Or(lhs, rhs) => {
|
||||
let lhs = lhs.evaluate(rtxn, index)?;
|
||||
let rhs = rhs.evaluate(rtxn, index)?;
|
||||
Ok(lhs | rhs)
|
||||
},
|
||||
And(lhs, rhs) => {
|
||||
let lhs = lhs.evaluate(rtxn, index)?;
|
||||
let rhs = rhs.evaluate(rtxn, index)?;
|
||||
Ok(lhs & rhs)
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::update::Settings;
|
||||
use heed::EnvOpenOptions;
|
||||
use maplit::hashmap;
|
||||
|
||||
#[test]
|
||||
fn string() {
|
||||
let path = tempfile::tempdir().unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||
let index = Index::new(options, &path).unwrap();
|
||||
|
||||
// Set the faceted fields to be the channel.
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index);
|
||||
builder.set_faceted_fields(hashmap!{ "channel".into() => "string".into() });
|
||||
builder.execute(|_| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
// Test that the facet condition is correctly generated.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let condition = FacetCondition::from_str(&rtxn, &index, "channel = ponce").unwrap();
|
||||
let expected = OperatorString(1, FacetStringOperator::equal("Ponce"));
|
||||
assert_eq!(condition, expected);
|
||||
|
||||
let condition = FacetCondition::from_str(&rtxn, &index, "channel != ponce").unwrap();
|
||||
let expected = OperatorString(1, FacetStringOperator::not_equal("ponce"));
|
||||
assert_eq!(condition, expected);
|
||||
|
||||
let condition = FacetCondition::from_str(&rtxn, &index, "NOT channel = ponce").unwrap();
|
||||
let expected = OperatorString(1, FacetStringOperator::not_equal("ponce"));
|
||||
assert_eq!(condition, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn i64() {
|
||||
let path = tempfile::tempdir().unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||
let index = Index::new(options, &path).unwrap();
|
||||
|
||||
// Set the faceted fields to be the channel.
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index);
|
||||
builder.set_faceted_fields(hashmap!{ "timestamp".into() => "integer".into() });
|
||||
builder.execute(|_| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
// Test that the facet condition is correctly generated.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let condition = FacetCondition::from_str(&rtxn, &index, "timestamp 22 TO 44").unwrap();
|
||||
let expected = OperatorI64(1, Between(22, 44));
|
||||
assert_eq!(condition, expected);
|
||||
|
||||
let condition = FacetCondition::from_str(&rtxn, &index, "NOT timestamp 22 TO 44").unwrap();
|
||||
let expected = Or(
|
||||
Box::new(OperatorI64(1, LowerThan(22))),
|
||||
Box::new(OperatorI64(1, GreaterThan(44))),
|
||||
);
|
||||
assert_eq!(condition, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parentheses() {
|
||||
let path = tempfile::tempdir().unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||
let index = Index::new(options, &path).unwrap();
|
||||
|
||||
// Set the faceted fields to be the channel.
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index);
|
||||
builder.set_searchable_fields(vec!["channel".into(), "timestamp".into()]); // to keep the fields order
|
||||
builder.set_faceted_fields(hashmap!{
|
||||
"channel".into() => "string".into(),
|
||||
"timestamp".into() => "integer".into(),
|
||||
});
|
||||
builder.execute(|_| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
// Test that the facet condition is correctly generated.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let condition = FacetCondition::from_str(
|
||||
&rtxn, &index,
|
||||
"channel = gotaga OR (timestamp 22 TO 44 AND channel != ponce)",
|
||||
).unwrap();
|
||||
let expected = Or(
|
||||
Box::new(OperatorString(0, FacetStringOperator::equal("gotaga"))),
|
||||
Box::new(And(
|
||||
Box::new(OperatorI64(1, Between(22, 44))),
|
||||
Box::new(OperatorString(0, FacetStringOperator::not_equal("ponce"))),
|
||||
))
|
||||
);
|
||||
assert_eq!(condition, expected);
|
||||
|
||||
let condition = FacetCondition::from_str(
|
||||
&rtxn, &index,
|
||||
"channel = gotaga OR NOT (timestamp 22 TO 44 AND channel != ponce)",
|
||||
).unwrap();
|
||||
let expected = Or(
|
||||
Box::new(OperatorString(0, FacetStringOperator::equal("gotaga"))),
|
||||
Box::new(Or(
|
||||
Box::new(Or(
|
||||
Box::new(OperatorI64(1, LowerThan(22))),
|
||||
Box::new(OperatorI64(1, GreaterThan(44))),
|
||||
)),
|
||||
Box::new(OperatorString(0, FacetStringOperator::equal("ponce"))),
|
||||
)),
|
||||
);
|
||||
assert_eq!(condition, expected);
|
||||
}
|
||||
}
|
@ -1,661 +1,255 @@
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::Debug;
|
||||
use std::ops::Bound::{self, Unbounded, Included, Excluded};
|
||||
use std::str::FromStr;
|
||||
use std::ops::Bound::{self, Included, Excluded, Unbounded};
|
||||
|
||||
use heed::types::{ByteSlice, DecodeIgnore};
|
||||
use either::Either::{self, Left, Right};
|
||||
use heed::types::{DecodeIgnore, ByteSlice};
|
||||
use heed::{BytesEncode, BytesDecode};
|
||||
use heed::{Database, RoRange, RoRevRange, LazyDecode};
|
||||
use log::debug;
|
||||
use num_traits::Bounded;
|
||||
use parser::{PREC_CLIMBER, FilterParser};
|
||||
use pest::error::{Error as PestError, ErrorVariant};
|
||||
use pest::iterators::{Pair, Pairs};
|
||||
use pest::Parser;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::facet::FacetValueStringCodec;
|
||||
use crate::heed_codec::facet::{FacetLevelValueI64Codec, FacetLevelValueF64Codec};
|
||||
use crate::{Index, FieldsIdsMap, CboRoaringBitmapCodec};
|
||||
use crate::heed_codec::CboRoaringBitmapCodec;
|
||||
use crate::{Index, FieldId};
|
||||
|
||||
use self::FacetCondition::*;
|
||||
use self::FacetNumberOperator::*;
|
||||
use self::parser::Rule;
|
||||
pub use self::facet_condition::{FacetCondition, FacetNumberOperator, FacetStringOperator};
|
||||
|
||||
mod facet_condition;
|
||||
mod parser;
|
||||
|
||||
#[derive(Debug, Copy, Clone, PartialEq)]
|
||||
pub enum FacetNumberOperator<T> {
|
||||
GreaterThan(T),
|
||||
GreaterThanOrEqual(T),
|
||||
Equal(T),
|
||||
NotEqual(T),
|
||||
LowerThan(T),
|
||||
LowerThanOrEqual(T),
|
||||
Between(T, T),
|
||||
struct FacetRange<'t, T: 't, KC> {
|
||||
iter: RoRange<'t, KC, LazyDecode<CboRoaringBitmapCodec>>,
|
||||
end: Bound<T>,
|
||||
}
|
||||
|
||||
impl<T> FacetNumberOperator<T> {
|
||||
/// This method can return two operations in case it must express
|
||||
/// an OR operation for the between case (i.e. `TO`).
|
||||
fn negate(self) -> (Self, Option<Self>) {
|
||||
match self {
|
||||
GreaterThan(x) => (LowerThanOrEqual(x), None),
|
||||
GreaterThanOrEqual(x) => (LowerThan(x), None),
|
||||
Equal(x) => (NotEqual(x), None),
|
||||
NotEqual(x) => (Equal(x), None),
|
||||
LowerThan(x) => (GreaterThanOrEqual(x), None),
|
||||
LowerThanOrEqual(x) => (GreaterThan(x), None),
|
||||
Between(x, y) => (LowerThan(x), Some(GreaterThan(y))),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum FacetStringOperator {
|
||||
Equal(String),
|
||||
NotEqual(String),
|
||||
}
|
||||
|
||||
impl FacetStringOperator {
|
||||
fn equal(s: &str) -> Self {
|
||||
FacetStringOperator::Equal(s.to_lowercase())
|
||||
}
|
||||
|
||||
fn not_equal(s: &str) -> Self {
|
||||
FacetStringOperator::equal(s).negate()
|
||||
}
|
||||
|
||||
fn negate(self) -> Self {
|
||||
match self {
|
||||
FacetStringOperator::Equal(x) => FacetStringOperator::NotEqual(x),
|
||||
FacetStringOperator::NotEqual(x) => FacetStringOperator::Equal(x),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum FacetCondition {
|
||||
OperatorI64(u8, FacetNumberOperator<i64>),
|
||||
OperatorF64(u8, FacetNumberOperator<f64>),
|
||||
OperatorString(u8, FacetStringOperator),
|
||||
Or(Box<Self>, Box<Self>),
|
||||
And(Box<Self>, Box<Self>),
|
||||
}
|
||||
|
||||
fn get_field_id_facet_type<'a>(
|
||||
fields_ids_map: &FieldsIdsMap,
|
||||
faceted_fields: &HashMap<u8, FacetType>,
|
||||
items: &mut Pairs<'a, Rule>,
|
||||
) -> Result<(u8, FacetType), PestError<Rule>>
|
||||
impl<'t, T: 't, KC> FacetRange<'t, T, KC>
|
||||
where
|
||||
KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>,
|
||||
T: PartialOrd + Copy + Bounded,
|
||||
{
|
||||
// lexing ensures that we at least have a key
|
||||
let key = items.next().unwrap();
|
||||
let field_id = fields_ids_map
|
||||
.id(key.as_str())
|
||||
.ok_or_else(|| {
|
||||
PestError::new_from_span(
|
||||
ErrorVariant::CustomError {
|
||||
message: format!(
|
||||
"attribute `{}` not found, available attributes are: {}",
|
||||
key.as_str(),
|
||||
fields_ids_map.iter().map(|(_, n)| n).collect::<Vec<_>>().join(", ")
|
||||
),
|
||||
},
|
||||
key.as_span(),
|
||||
)
|
||||
})?;
|
||||
|
||||
let facet_type = faceted_fields
|
||||
.get(&field_id)
|
||||
.copied()
|
||||
.ok_or_else(|| {
|
||||
PestError::new_from_span(
|
||||
ErrorVariant::CustomError {
|
||||
message: format!(
|
||||
"attribute `{}` is not faceted, available faceted attributes are: {}",
|
||||
key.as_str(),
|
||||
faceted_fields.keys().flat_map(|id| fields_ids_map.name(*id)).collect::<Vec<_>>().join(", ")
|
||||
),
|
||||
},
|
||||
key.as_span(),
|
||||
)
|
||||
})?;
|
||||
|
||||
Ok((field_id, facet_type))
|
||||
}
|
||||
|
||||
fn pest_parse<T>(pair: Pair<Rule>) -> Result<T, pest::error::Error<Rule>>
|
||||
where T: FromStr,
|
||||
T::Err: ToString,
|
||||
{
|
||||
match pair.as_str().parse() {
|
||||
Ok(value) => Ok(value),
|
||||
Err(e) => {
|
||||
Err(PestError::<Rule>::new_from_span(
|
||||
ErrorVariant::CustomError { message: e.to_string() },
|
||||
pair.as_span(),
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FacetCondition {
|
||||
pub fn from_str(
|
||||
rtxn: &heed::RoTxn,
|
||||
index: &Index,
|
||||
expression: &str,
|
||||
) -> anyhow::Result<FacetCondition>
|
||||
{
|
||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||
let faceted_fields = index.faceted_fields(rtxn)?;
|
||||
let lexed = FilterParser::parse(Rule::prgm, expression)?;
|
||||
FacetCondition::from_pairs(&fields_ids_map, &faceted_fields, lexed)
|
||||
}
|
||||
|
||||
fn from_pairs(
|
||||
fim: &FieldsIdsMap,
|
||||
ff: &HashMap<u8, FacetType>,
|
||||
expression: Pairs<Rule>,
|
||||
) -> anyhow::Result<Self>
|
||||
{
|
||||
PREC_CLIMBER.climb(
|
||||
expression,
|
||||
|pair: Pair<Rule>| match pair.as_rule() {
|
||||
Rule::greater => Ok(Self::greater_than(fim, ff, pair)?),
|
||||
Rule::geq => Ok(Self::greater_than_or_equal(fim, ff, pair)?),
|
||||
Rule::eq => Ok(Self::equal(fim, ff, pair)?),
|
||||
Rule::neq => Ok(Self::equal(fim, ff, pair)?.negate()),
|
||||
Rule::leq => Ok(Self::lower_than_or_equal(fim, ff, pair)?),
|
||||
Rule::less => Ok(Self::lower_than(fim, ff, pair)?),
|
||||
Rule::between => Ok(Self::between(fim, ff, pair)?),
|
||||
Rule::not => Ok(Self::from_pairs(fim, ff, pair.into_inner())?.negate()),
|
||||
Rule::prgm => Self::from_pairs(fim, ff, pair.into_inner()),
|
||||
Rule::term => Self::from_pairs(fim, ff, pair.into_inner()),
|
||||
_ => unreachable!(),
|
||||
},
|
||||
|lhs: anyhow::Result<Self>, op: Pair<Rule>, rhs: anyhow::Result<Self>| {
|
||||
match op.as_rule() {
|
||||
Rule::or => Ok(Or(Box::new(lhs?), Box::new(rhs?))),
|
||||
Rule::and => Ok(And(Box::new(lhs?), Box::new(rhs?))),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
fn negate(self) -> FacetCondition {
|
||||
match self {
|
||||
OperatorI64(fid, op) => match op.negate() {
|
||||
(op, None) => OperatorI64(fid, op),
|
||||
(a, Some(b)) => Or(Box::new(OperatorI64(fid, a)), Box::new(OperatorI64(fid, b))),
|
||||
},
|
||||
OperatorF64(fid, op) => match op.negate() {
|
||||
(op, None) => OperatorF64(fid, op),
|
||||
(a, Some(b)) => Or(Box::new(OperatorF64(fid, a)), Box::new(OperatorF64(fid, b))),
|
||||
},
|
||||
OperatorString(fid, op) => OperatorString(fid, op.negate()),
|
||||
Or(a, b) => And(Box::new(a.negate()), Box::new(b.negate())),
|
||||
And(a, b) => Or(Box::new(a.negate()), Box::new(b.negate())),
|
||||
}
|
||||
}
|
||||
|
||||
fn between(
|
||||
fields_ids_map: &FieldsIdsMap,
|
||||
faceted_fields: &HashMap<u8, FacetType>,
|
||||
item: Pair<Rule>,
|
||||
) -> anyhow::Result<FacetCondition>
|
||||
{
|
||||
let item_span = item.as_span();
|
||||
let mut items = item.into_inner();
|
||||
let (fid, ftype) = get_field_id_facet_type(fields_ids_map, faceted_fields, &mut items)?;
|
||||
let lvalue = items.next().unwrap();
|
||||
let rvalue = items.next().unwrap();
|
||||
match ftype {
|
||||
FacetType::Integer => {
|
||||
let lvalue = pest_parse(lvalue)?;
|
||||
let rvalue = pest_parse(rvalue)?;
|
||||
Ok(OperatorI64(fid, Between(lvalue, rvalue)))
|
||||
},
|
||||
FacetType::Float => {
|
||||
let lvalue = pest_parse(lvalue)?;
|
||||
let rvalue = pest_parse(rvalue)?;
|
||||
Ok(OperatorF64(fid, Between(lvalue, rvalue)))
|
||||
},
|
||||
FacetType::String => {
|
||||
Err(PestError::<Rule>::new_from_span(
|
||||
ErrorVariant::CustomError {
|
||||
message: "invalid operator on a faceted string".to_string(),
|
||||
},
|
||||
item_span,
|
||||
).into())
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn equal(
|
||||
fields_ids_map: &FieldsIdsMap,
|
||||
faceted_fields: &HashMap<u8, FacetType>,
|
||||
item: Pair<Rule>,
|
||||
) -> anyhow::Result<FacetCondition>
|
||||
{
|
||||
let mut items = item.into_inner();
|
||||
let (fid, ftype) = get_field_id_facet_type(fields_ids_map, faceted_fields, &mut items)?;
|
||||
let value = items.next().unwrap();
|
||||
match ftype {
|
||||
FacetType::Integer => Ok(OperatorI64(fid, Equal(pest_parse(value)?))),
|
||||
FacetType::Float => Ok(OperatorF64(fid, Equal(pest_parse(value)?))),
|
||||
FacetType::String => Ok(OperatorString(fid, FacetStringOperator::equal(value.as_str()))),
|
||||
}
|
||||
}
|
||||
|
||||
fn greater_than(
|
||||
fields_ids_map: &FieldsIdsMap,
|
||||
faceted_fields: &HashMap<u8, FacetType>,
|
||||
item: Pair<Rule>,
|
||||
) -> anyhow::Result<FacetCondition>
|
||||
{
|
||||
let item_span = item.as_span();
|
||||
let mut items = item.into_inner();
|
||||
let (fid, ftype) = get_field_id_facet_type(fields_ids_map, faceted_fields, &mut items)?;
|
||||
let value = items.next().unwrap();
|
||||
match ftype {
|
||||
FacetType::Integer => Ok(OperatorI64(fid, GreaterThan(pest_parse(value)?))),
|
||||
FacetType::Float => Ok(OperatorF64(fid, GreaterThan(pest_parse(value)?))),
|
||||
FacetType::String => {
|
||||
Err(PestError::<Rule>::new_from_span(
|
||||
ErrorVariant::CustomError {
|
||||
message: "invalid operator on a faceted string".to_string(),
|
||||
},
|
||||
item_span,
|
||||
).into())
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn greater_than_or_equal(
|
||||
fields_ids_map: &FieldsIdsMap,
|
||||
faceted_fields: &HashMap<u8, FacetType>,
|
||||
item: Pair<Rule>,
|
||||
) -> anyhow::Result<FacetCondition>
|
||||
{
|
||||
let item_span = item.as_span();
|
||||
let mut items = item.into_inner();
|
||||
let (fid, ftype) = get_field_id_facet_type(fields_ids_map, faceted_fields, &mut items)?;
|
||||
let value = items.next().unwrap();
|
||||
match ftype {
|
||||
FacetType::Integer => Ok(OperatorI64(fid, GreaterThanOrEqual(pest_parse(value)?))),
|
||||
FacetType::Float => Ok(OperatorF64(fid, GreaterThanOrEqual(pest_parse(value)?))),
|
||||
FacetType::String => {
|
||||
Err(PestError::<Rule>::new_from_span(
|
||||
ErrorVariant::CustomError {
|
||||
message: "invalid operator on a faceted string".to_string(),
|
||||
},
|
||||
item_span,
|
||||
).into())
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn lower_than(
|
||||
fields_ids_map: &FieldsIdsMap,
|
||||
faceted_fields: &HashMap<u8, FacetType>,
|
||||
item: Pair<Rule>,
|
||||
) -> anyhow::Result<FacetCondition>
|
||||
{
|
||||
let item_span = item.as_span();
|
||||
let mut items = item.into_inner();
|
||||
let (fid, ftype) = get_field_id_facet_type(fields_ids_map, faceted_fields, &mut items)?;
|
||||
let value = items.next().unwrap();
|
||||
match ftype {
|
||||
FacetType::Integer => Ok(OperatorI64(fid, LowerThan(pest_parse(value)?))),
|
||||
FacetType::Float => Ok(OperatorF64(fid, LowerThan(pest_parse(value)?))),
|
||||
FacetType::String => {
|
||||
Err(PestError::<Rule>::new_from_span(
|
||||
ErrorVariant::CustomError {
|
||||
message: "invalid operator on a faceted string".to_string(),
|
||||
},
|
||||
item_span,
|
||||
).into())
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn lower_than_or_equal(
|
||||
fields_ids_map: &FieldsIdsMap,
|
||||
faceted_fields: &HashMap<u8, FacetType>,
|
||||
item: Pair<Rule>,
|
||||
) -> anyhow::Result<FacetCondition>
|
||||
{
|
||||
let item_span = item.as_span();
|
||||
let mut items = item.into_inner();
|
||||
let (fid, ftype) = get_field_id_facet_type(fields_ids_map, faceted_fields, &mut items)?;
|
||||
let value = items.next().unwrap();
|
||||
match ftype {
|
||||
FacetType::Integer => Ok(OperatorI64(fid, LowerThanOrEqual(pest_parse(value)?))),
|
||||
FacetType::Float => Ok(OperatorF64(fid, LowerThanOrEqual(pest_parse(value)?))),
|
||||
FacetType::String => {
|
||||
Err(PestError::<Rule>::new_from_span(
|
||||
ErrorVariant::CustomError {
|
||||
message: "invalid operator on a faceted string".to_string(),
|
||||
},
|
||||
item_span,
|
||||
).into())
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FacetCondition {
|
||||
/// Aggregates the documents ids that are part of the specified range automatically
|
||||
/// going deeper through the levels.
|
||||
fn explore_facet_levels<'t, T: 't, KC>(
|
||||
fn new(
|
||||
rtxn: &'t heed::RoTxn,
|
||||
db: heed::Database<ByteSlice, CboRoaringBitmapCodec>,
|
||||
field_id: u8,
|
||||
db: Database<KC, CboRoaringBitmapCodec>,
|
||||
field_id: FieldId,
|
||||
level: u8,
|
||||
left: Bound<T>,
|
||||
right: Bound<T>,
|
||||
output: &mut RoaringBitmap,
|
||||
) -> anyhow::Result<()>
|
||||
where
|
||||
T: Copy + PartialEq + PartialOrd + Bounded + Debug,
|
||||
KC: heed::BytesDecode<'t, DItem = (u8, u8, T, T)>,
|
||||
KC: for<'x> heed::BytesEncode<'x, EItem = (u8, u8, T, T)>,
|
||||
) -> heed::Result<FacetRange<'t, T, KC>>
|
||||
{
|
||||
match (left, right) {
|
||||
// If the request is an exact value we must go directly to the deepest level.
|
||||
(Included(l), Included(r)) if l == r && level > 0 => {
|
||||
return Self::explore_facet_levels::<T, KC>(rtxn, db, field_id, 0, left, right, output);
|
||||
},
|
||||
// lower TO upper when lower > upper must return no result
|
||||
(Included(l), Included(r)) if l > r => return Ok(()),
|
||||
(Included(l), Excluded(r)) if l >= r => return Ok(()),
|
||||
(Excluded(l), Excluded(r)) if l >= r => return Ok(()),
|
||||
(Excluded(l), Included(r)) if l >= r => return Ok(()),
|
||||
(_, _) => (),
|
||||
}
|
||||
|
||||
let mut left_found = None;
|
||||
let mut right_found = None;
|
||||
|
||||
// We must create a custom iterator to be able to iterate over the
|
||||
// requested range as the range iterator cannot express some conditions.
|
||||
let left_bound = match left {
|
||||
Included(left) => Included((field_id, level, left, T::min_value())),
|
||||
Excluded(left) => Excluded((field_id, level, left, T::min_value())),
|
||||
Unbounded => Unbounded,
|
||||
Unbounded => Included((field_id, level, T::min_value(), T::min_value())),
|
||||
};
|
||||
let right_bound = Included((field_id, level, T::max_value(), T::max_value()));
|
||||
// We also make sure that we don't decode the data before we are sure we must return it.
|
||||
let iter = db
|
||||
.remap_key_type::<KC>()
|
||||
.lazily_decode_data()
|
||||
.range(rtxn, &(left_bound, right_bound))?
|
||||
.take_while(|r| r.as_ref().map_or(true, |((.., r), _)| {
|
||||
match right {
|
||||
Included(right) => *r <= right,
|
||||
Excluded(right) => *r < right,
|
||||
let iter = db.lazily_decode_data().range(rtxn, &(left_bound, right_bound))?;
|
||||
Ok(FacetRange { iter, end: right })
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t, T, KC> Iterator for FacetRange<'t, T, KC>
|
||||
where
|
||||
KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>,
|
||||
KC: BytesDecode<'t, DItem = (FieldId, u8, T, T)>,
|
||||
T: PartialOrd + Copy,
|
||||
{
|
||||
type Item = heed::Result<((FieldId, u8, T, T), RoaringBitmap)>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match self.iter.next() {
|
||||
Some(Ok(((fid, level, left, right), docids))) => {
|
||||
let must_be_returned = match self.end {
|
||||
Included(end) => right <= end,
|
||||
Excluded(end) => right < end,
|
||||
Unbounded => true,
|
||||
}
|
||||
}))
|
||||
.map(|r| r.and_then(|(key, lazy)| lazy.decode().map(|data| (key, data))));
|
||||
|
||||
debug!("Iterating between {:?} and {:?} (level {})", left, right, level);
|
||||
|
||||
for (i, result) in iter.enumerate() {
|
||||
let ((_fid, level, l, r), docids) = result?;
|
||||
debug!("{:?} to {:?} (level {}) found {} documents", l, r, level, docids.len());
|
||||
output.union_with(&docids);
|
||||
// We save the leftest and rightest bounds we actually found at this level.
|
||||
if i == 0 { left_found = Some(l); }
|
||||
right_found = Some(r);
|
||||
}
|
||||
|
||||
// Can we go deeper?
|
||||
let deeper_level = match level.checked_sub(1) {
|
||||
Some(level) => level,
|
||||
None => return Ok(()),
|
||||
};
|
||||
|
||||
// We must refine the left and right bounds of this range by retrieving the
|
||||
// missing part in a deeper level.
|
||||
match left_found.zip(right_found) {
|
||||
Some((left_found, right_found)) => {
|
||||
// If the bound is satisfied we avoid calling this function again.
|
||||
if !matches!(left, Included(l) if l == left_found) {
|
||||
let sub_right = Excluded(left_found);
|
||||
debug!("calling left with {:?} to {:?} (level {})", left, sub_right, deeper_level);
|
||||
Self::explore_facet_levels::<T, KC>(rtxn, db, field_id, deeper_level, left, sub_right, output)?;
|
||||
}
|
||||
if !matches!(right, Included(r) if r == right_found) {
|
||||
let sub_left = Excluded(right_found);
|
||||
debug!("calling right with {:?} to {:?} (level {})", sub_left, right, deeper_level);
|
||||
Self::explore_facet_levels::<T, KC>(rtxn, db, field_id, deeper_level, sub_left, right, output)?;
|
||||
};
|
||||
if must_be_returned {
|
||||
match docids.decode() {
|
||||
Ok(docids) => Some(Ok(((fid, level, left, right), docids))),
|
||||
Err(e) => Some(Err(e)),
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
},
|
||||
None => {
|
||||
// If we found nothing at this level it means that we must find
|
||||
// the same bounds but at a deeper, more precise level.
|
||||
Self::explore_facet_levels::<T, KC>(rtxn, db, field_id, deeper_level, left, right, output)?;
|
||||
},
|
||||
Some(Err(e)) => Some(Err(e)),
|
||||
None => None,
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn evaluate_number_operator<'t, T: 't, KC>(
|
||||
struct FacetRevRange<'t, T: 't, KC> {
|
||||
iter: RoRevRange<'t, KC, LazyDecode<CboRoaringBitmapCodec>>,
|
||||
end: Bound<T>,
|
||||
}
|
||||
|
||||
impl<'t, T: 't, KC> FacetRevRange<'t, T, KC>
|
||||
where
|
||||
KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>,
|
||||
T: PartialOrd + Copy + Bounded,
|
||||
{
|
||||
fn new(
|
||||
rtxn: &'t heed::RoTxn,
|
||||
index: &Index,
|
||||
db: heed::Database<ByteSlice, CboRoaringBitmapCodec>,
|
||||
field_id: u8,
|
||||
operator: FacetNumberOperator<T>,
|
||||
) -> anyhow::Result<RoaringBitmap>
|
||||
where
|
||||
T: Copy + PartialEq + PartialOrd + Bounded + Debug,
|
||||
KC: heed::BytesDecode<'t, DItem = (u8, u8, T, T)>,
|
||||
KC: for<'x> heed::BytesEncode<'x, EItem = (u8, u8, T, T)>,
|
||||
db: Database<KC, CboRoaringBitmapCodec>,
|
||||
field_id: FieldId,
|
||||
level: u8,
|
||||
left: Bound<T>,
|
||||
right: Bound<T>,
|
||||
) -> heed::Result<FacetRevRange<'t, T, KC>>
|
||||
{
|
||||
// Make sure we always bound the ranges with the field id and the level,
|
||||
// as the facets values are all in the same database and prefixed by the
|
||||
// field id and the level.
|
||||
let (left, right) = match operator {
|
||||
GreaterThan(val) => (Excluded(val), Included(T::max_value())),
|
||||
GreaterThanOrEqual(val) => (Included(val), Included(T::max_value())),
|
||||
Equal(val) => (Included(val), Included(val)),
|
||||
NotEqual(val) => {
|
||||
let all_documents_ids = index.faceted_documents_ids(rtxn, field_id)?;
|
||||
let docids = Self::evaluate_number_operator::<T, KC>(rtxn, index, db, field_id, Equal(val))?;
|
||||
return Ok(all_documents_ids - docids);
|
||||
},
|
||||
LowerThan(val) => (Included(T::min_value()), Excluded(val)),
|
||||
LowerThanOrEqual(val) => (Included(T::min_value()), Included(val)),
|
||||
Between(left, right) => (Included(left), Included(right)),
|
||||
let left_bound = match left {
|
||||
Included(left) => Included((field_id, level, left, T::min_value())),
|
||||
Excluded(left) => Excluded((field_id, level, left, T::min_value())),
|
||||
Unbounded => Included((field_id, level, T::min_value(), T::min_value())),
|
||||
};
|
||||
let right_bound = Included((field_id, level, T::max_value(), T::max_value()));
|
||||
let iter = db.lazily_decode_data().rev_range(rtxn, &(left_bound, right_bound))?;
|
||||
Ok(FacetRevRange { iter, end: right })
|
||||
}
|
||||
}
|
||||
|
||||
// Ask for the biggest value that can exist for this specific field, if it exists
|
||||
// that's fine if it don't, the value just before will be returned instead.
|
||||
let biggest_level = db
|
||||
.remap_types::<KC, DecodeIgnore>()
|
||||
.get_lower_than_or_equal_to(rtxn, &(field_id, u8::MAX, T::max_value(), T::max_value()))?
|
||||
.and_then(|((id, level, _, _), _)| if id == field_id { Some(level) } else { None });
|
||||
impl<'t, T, KC> Iterator for FacetRevRange<'t, T, KC>
|
||||
where
|
||||
KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>,
|
||||
KC: BytesDecode<'t, DItem = (FieldId, u8, T, T)>,
|
||||
T: PartialOrd + Copy,
|
||||
{
|
||||
type Item = heed::Result<((FieldId, u8, T, T), RoaringBitmap)>;
|
||||
|
||||
match biggest_level {
|
||||
Some(level) => {
|
||||
let mut output = RoaringBitmap::new();
|
||||
Self::explore_facet_levels::<T, KC>(rtxn, db, field_id, level, left, right, &mut output)?;
|
||||
Ok(output)
|
||||
},
|
||||
None => Ok(RoaringBitmap::new()),
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
loop {
|
||||
match self.iter.next() {
|
||||
Some(Ok(((fid, level, left, right), docids))) => {
|
||||
let must_be_returned = match self.end {
|
||||
Included(end) => right <= end,
|
||||
Excluded(end) => right < end,
|
||||
Unbounded => true,
|
||||
};
|
||||
if must_be_returned {
|
||||
match docids.decode() {
|
||||
Ok(docids) => return Some(Ok(((fid, level, left, right), docids))),
|
||||
Err(e) => return Some(Err(e)),
|
||||
}
|
||||
}
|
||||
continue;
|
||||
},
|
||||
Some(Err(e)) => return Some(Err(e)),
|
||||
None => return None,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn evaluate_string_operator(
|
||||
rtxn: &heed::RoTxn,
|
||||
index: &Index,
|
||||
db: heed::Database<FacetValueStringCodec, CboRoaringBitmapCodec>,
|
||||
field_id: u8,
|
||||
operator: &FacetStringOperator,
|
||||
) -> anyhow::Result<RoaringBitmap>
|
||||
pub struct FacetIter<'t, T: 't, KC> {
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
db: Database<KC, CboRoaringBitmapCodec>,
|
||||
field_id: FieldId,
|
||||
level_iters: Vec<(RoaringBitmap, Either<FacetRange<'t, T, KC>, FacetRevRange<'t, T, KC>>)>,
|
||||
}
|
||||
|
||||
impl<'t, T, KC> FacetIter<'t, T, KC>
|
||||
where
|
||||
KC: heed::BytesDecode<'t, DItem = (FieldId, u8, T, T)>,
|
||||
KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>,
|
||||
T: PartialOrd + Copy + Bounded,
|
||||
{
|
||||
pub fn new(
|
||||
rtxn: &'t heed::RoTxn,
|
||||
index: &'t Index,
|
||||
field_id: FieldId,
|
||||
documents_ids: RoaringBitmap,
|
||||
) -> heed::Result<FacetIter<'t, T, KC>>
|
||||
{
|
||||
match operator {
|
||||
FacetStringOperator::Equal(string) => {
|
||||
match db.get(rtxn, &(field_id, string))? {
|
||||
Some(docids) => Ok(docids),
|
||||
None => Ok(RoaringBitmap::new())
|
||||
let db = index.facet_field_id_value_docids.remap_key_type::<KC>();
|
||||
let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0);
|
||||
let highest_iter = FacetRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?;
|
||||
Ok(FacetIter { rtxn, db, field_id, level_iters: vec![(documents_ids, Left(highest_iter))] })
|
||||
}
|
||||
|
||||
pub fn new_reverse(
|
||||
rtxn: &'t heed::RoTxn,
|
||||
index: &'t Index,
|
||||
field_id: FieldId,
|
||||
documents_ids: RoaringBitmap,
|
||||
) -> heed::Result<FacetIter<'t, T, KC>>
|
||||
{
|
||||
let db = index.facet_field_id_value_docids.remap_key_type::<KC>();
|
||||
let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0);
|
||||
let highest_iter = FacetRevRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?;
|
||||
Ok(FacetIter { rtxn, db, field_id, level_iters: vec![(documents_ids, Right(highest_iter))] })
|
||||
}
|
||||
|
||||
fn highest_level<X>(rtxn: &'t heed::RoTxn, db: Database<KC, X>, fid: FieldId) -> heed::Result<Option<u8>> {
|
||||
let level = db.remap_types::<ByteSlice, DecodeIgnore>()
|
||||
.prefix_iter(rtxn, &[fid][..])?
|
||||
.remap_key_type::<KC>()
|
||||
.last().transpose()?
|
||||
.map(|((_, level, _, _), _)| level);
|
||||
Ok(level)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t, T: 't, KC> Iterator for FacetIter<'t, T, KC>
|
||||
where
|
||||
KC: heed::BytesDecode<'t, DItem = (FieldId, u8, T, T)>,
|
||||
KC: for<'x> heed::BytesEncode<'x, EItem = (FieldId, u8, T, T)>,
|
||||
T: PartialOrd + Copy + Bounded + Debug,
|
||||
{
|
||||
type Item = heed::Result<(T, RoaringBitmap)>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
'outer: loop {
|
||||
let (documents_ids, last) = self.level_iters.last_mut()?;
|
||||
let is_ascending = last.is_left();
|
||||
for result in last {
|
||||
// If the last iterator must find an empty set of documents it means
|
||||
// that we found all the documents in the sub level iterations already,
|
||||
// we can pop this level iterator.
|
||||
if documents_ids.is_empty() {
|
||||
break;
|
||||
}
|
||||
},
|
||||
FacetStringOperator::NotEqual(string) => {
|
||||
let all_documents_ids = index.faceted_documents_ids(rtxn, field_id)?;
|
||||
let op = FacetStringOperator::Equal(string.clone());
|
||||
let docids = Self::evaluate_string_operator(rtxn, index, db, field_id, &op)?;
|
||||
Ok(all_documents_ids - docids)
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn evaluate(
|
||||
&self,
|
||||
rtxn: &heed::RoTxn,
|
||||
index: &Index,
|
||||
) -> anyhow::Result<RoaringBitmap>
|
||||
{
|
||||
let db = index.facet_field_id_value_docids;
|
||||
match self {
|
||||
OperatorI64(fid, op) => {
|
||||
Self::evaluate_number_operator::<i64, FacetLevelValueI64Codec>(rtxn, index, db, *fid, *op)
|
||||
},
|
||||
OperatorF64(fid, op) => {
|
||||
Self::evaluate_number_operator::<f64, FacetLevelValueF64Codec>(rtxn, index, db, *fid, *op)
|
||||
},
|
||||
OperatorString(fid, op) => {
|
||||
let db = db.remap_key_type::<FacetValueStringCodec>();
|
||||
Self::evaluate_string_operator(rtxn, index, db, *fid, op)
|
||||
},
|
||||
Or(lhs, rhs) => {
|
||||
let lhs = lhs.evaluate(rtxn, index)?;
|
||||
let rhs = rhs.evaluate(rtxn, index)?;
|
||||
Ok(lhs | rhs)
|
||||
},
|
||||
And(lhs, rhs) => {
|
||||
let lhs = lhs.evaluate(rtxn, index)?;
|
||||
let rhs = rhs.evaluate(rtxn, index)?;
|
||||
Ok(lhs & rhs)
|
||||
},
|
||||
match result {
|
||||
Ok(((_fid, level, left, right), mut docids)) => {
|
||||
|
||||
docids.intersect_with(&documents_ids);
|
||||
if !docids.is_empty() {
|
||||
documents_ids.difference_with(&docids);
|
||||
|
||||
if level == 0 {
|
||||
debug!("found {:?} at {:?}", docids, left);
|
||||
return Some(Ok((left, docids)));
|
||||
}
|
||||
|
||||
let rtxn = self.rtxn;
|
||||
let db = self.db;
|
||||
let fid = self.field_id;
|
||||
let left = Included(left);
|
||||
let right = Included(right);
|
||||
|
||||
debug!("calling with {:?} to {:?} (level {}) to find {:?}",
|
||||
left, right, level - 1, docids,
|
||||
);
|
||||
|
||||
let result = if is_ascending {
|
||||
FacetRange::new(rtxn, db, fid, level - 1, left, right).map(Left)
|
||||
} else {
|
||||
FacetRevRange::new(rtxn, db, fid, level - 1, left, right).map(Right)
|
||||
};
|
||||
|
||||
match result {
|
||||
Ok(iter) => {
|
||||
self.level_iters.push((docids, iter));
|
||||
continue 'outer;
|
||||
},
|
||||
Err(e) => return Some(Err(e)),
|
||||
}
|
||||
}
|
||||
},
|
||||
Err(e) => return Some(Err(e)),
|
||||
}
|
||||
}
|
||||
self.level_iters.pop();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::update::Settings;
|
||||
use heed::EnvOpenOptions;
|
||||
use maplit::hashmap;
|
||||
|
||||
#[test]
|
||||
fn string() {
|
||||
let path = tempfile::tempdir().unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||
let index = Index::new(options, &path).unwrap();
|
||||
|
||||
// Set the faceted fields to be the channel.
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index);
|
||||
builder.set_faceted_fields(hashmap!{ "channel".into() => "string".into() });
|
||||
builder.execute(|_| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
// Test that the facet condition is correctly generated.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let condition = FacetCondition::from_str(&rtxn, &index, "channel = ponce").unwrap();
|
||||
let expected = OperatorString(1, FacetStringOperator::equal("Ponce"));
|
||||
assert_eq!(condition, expected);
|
||||
|
||||
let condition = FacetCondition::from_str(&rtxn, &index, "channel != ponce").unwrap();
|
||||
let expected = OperatorString(1, FacetStringOperator::not_equal("ponce"));
|
||||
assert_eq!(condition, expected);
|
||||
|
||||
let condition = FacetCondition::from_str(&rtxn, &index, "NOT channel = ponce").unwrap();
|
||||
let expected = OperatorString(1, FacetStringOperator::not_equal("ponce"));
|
||||
assert_eq!(condition, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn i64() {
|
||||
let path = tempfile::tempdir().unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||
let index = Index::new(options, &path).unwrap();
|
||||
|
||||
// Set the faceted fields to be the channel.
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index);
|
||||
builder.set_faceted_fields(hashmap!{ "timestamp".into() => "integer".into() });
|
||||
builder.execute(|_| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
// Test that the facet condition is correctly generated.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let condition = FacetCondition::from_str(&rtxn, &index, "timestamp 22 TO 44").unwrap();
|
||||
let expected = OperatorI64(1, Between(22, 44));
|
||||
assert_eq!(condition, expected);
|
||||
|
||||
let condition = FacetCondition::from_str(&rtxn, &index, "NOT timestamp 22 TO 44").unwrap();
|
||||
let expected = Or(
|
||||
Box::new(OperatorI64(1, LowerThan(22))),
|
||||
Box::new(OperatorI64(1, GreaterThan(44))),
|
||||
);
|
||||
assert_eq!(condition, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parentheses() {
|
||||
let path = tempfile::tempdir().unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||
let index = Index::new(options, &path).unwrap();
|
||||
|
||||
// Set the faceted fields to be the channel.
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index);
|
||||
builder.set_searchable_fields(vec!["channel".into(), "timestamp".into()]); // to keep the fields order
|
||||
builder.set_faceted_fields(hashmap!{
|
||||
"channel".into() => "string".into(),
|
||||
"timestamp".into() => "integer".into(),
|
||||
});
|
||||
builder.execute(|_| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
// Test that the facet condition is correctly generated.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let condition = FacetCondition::from_str(
|
||||
&rtxn, &index,
|
||||
"channel = gotaga OR (timestamp 22 TO 44 AND channel != ponce)",
|
||||
).unwrap();
|
||||
let expected = Or(
|
||||
Box::new(OperatorString(0, FacetStringOperator::equal("gotaga"))),
|
||||
Box::new(And(
|
||||
Box::new(OperatorI64(1, Between(22, 44))),
|
||||
Box::new(OperatorString(0, FacetStringOperator::not_equal("ponce"))),
|
||||
))
|
||||
);
|
||||
assert_eq!(condition, expected);
|
||||
|
||||
let condition = FacetCondition::from_str(
|
||||
&rtxn, &index,
|
||||
"channel = gotaga OR NOT (timestamp 22 TO 44 AND channel != ponce)",
|
||||
).unwrap();
|
||||
let expected = Or(
|
||||
Box::new(OperatorString(0, FacetStringOperator::equal("gotaga"))),
|
||||
Box::new(Or(
|
||||
Box::new(Or(
|
||||
Box::new(OperatorI64(1, LowerThan(22))),
|
||||
Box::new(OperatorI64(1, GreaterThan(44))),
|
||||
)),
|
||||
Box::new(OperatorString(0, FacetStringOperator::equal("ponce"))),
|
||||
)),
|
||||
);
|
||||
assert_eq!(condition, expected);
|
||||
}
|
||||
}
|
||||
|
@ -1,19 +1,26 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::fmt;
|
||||
use std::time::Instant;
|
||||
|
||||
use anyhow::{bail, Context};
|
||||
use fst::{IntoStreamer, Streamer};
|
||||
use levenshtein_automata::DFA;
|
||||
use levenshtein_automata::LevenshteinAutomatonBuilder as LevBuilder;
|
||||
use log::debug;
|
||||
use once_cell::sync::Lazy;
|
||||
use ordered_float::OrderedFloat;
|
||||
use roaring::bitmap::RoaringBitmap;
|
||||
|
||||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::facet::{FacetLevelValueF64Codec, FacetLevelValueI64Codec};
|
||||
use crate::heed_codec::facet::{FieldDocIdFacetF64Codec, FieldDocIdFacetI64Codec};
|
||||
use crate::mdfs::Mdfs;
|
||||
use crate::query_tokens::{QueryTokens, QueryToken};
|
||||
use crate::{Index, DocumentId};
|
||||
use crate::{Index, FieldId, DocumentId, Criterion};
|
||||
|
||||
pub use self::facet::FacetCondition;
|
||||
pub use self::facet::{FacetCondition, FacetNumberOperator, FacetStringOperator};
|
||||
pub use self::facet::{FacetIter};
|
||||
|
||||
// Building these factories is not free.
|
||||
static LEVDIST0: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(0, true));
|
||||
@ -144,6 +151,84 @@ impl<'a> Search<'a> {
|
||||
candidates
|
||||
}
|
||||
|
||||
fn facet_ordered(
|
||||
&self,
|
||||
field_id: FieldId,
|
||||
facet_type: FacetType,
|
||||
ascending: bool,
|
||||
documents_ids: RoaringBitmap,
|
||||
limit: usize,
|
||||
) -> anyhow::Result<Vec<DocumentId>>
|
||||
{
|
||||
let mut limit_tmp = limit;
|
||||
let mut output = Vec::new();
|
||||
match facet_type {
|
||||
FacetType::Float => {
|
||||
if documents_ids.len() <= 1000 {
|
||||
let db = self.index.field_id_docid_facet_values.remap_key_type::<FieldDocIdFacetF64Codec>();
|
||||
let mut docids_values = Vec::with_capacity(documents_ids.len() as usize);
|
||||
for docid in documents_ids {
|
||||
let left = (field_id, docid, f64::MIN);
|
||||
let right = (field_id, docid, f64::MAX);
|
||||
let mut iter = db.range(self.rtxn, &(left..=right))?;
|
||||
let entry = if ascending { iter.next() } else { iter.last() };
|
||||
if let Some(((_, _, value), ())) = entry.transpose()? {
|
||||
docids_values.push((docid, OrderedFloat(value)));
|
||||
}
|
||||
}
|
||||
docids_values.sort_unstable_by_key(|(_, value)| *value);
|
||||
let iter = docids_values.into_iter().map(|(id, _)| id).take(limit);
|
||||
if ascending { Ok(iter.collect()) } else { Ok(iter.rev().collect()) }
|
||||
} else {
|
||||
let facet_fn = if ascending {
|
||||
FacetIter::<f64, FacetLevelValueF64Codec>::new
|
||||
} else {
|
||||
FacetIter::<f64, FacetLevelValueF64Codec>::new_reverse
|
||||
};
|
||||
for result in facet_fn(self.rtxn, self.index, field_id, documents_ids)? {
|
||||
let (_val, docids) = result?;
|
||||
limit_tmp = limit_tmp.saturating_sub(docids.len() as usize);
|
||||
output.push(docids);
|
||||
if limit_tmp == 0 { break }
|
||||
}
|
||||
Ok(output.into_iter().flatten().take(limit).collect())
|
||||
}
|
||||
},
|
||||
FacetType::Integer => {
|
||||
if documents_ids.len() <= 1000 {
|
||||
let db = self.index.field_id_docid_facet_values.remap_key_type::<FieldDocIdFacetI64Codec>();
|
||||
let mut docids_values = Vec::with_capacity(documents_ids.len() as usize);
|
||||
for docid in documents_ids {
|
||||
let left = (field_id, docid, i64::MIN);
|
||||
let right = (field_id, docid, i64::MAX);
|
||||
let mut iter = db.range(self.rtxn, &(left..=right))?;
|
||||
let entry = if ascending { iter.next() } else { iter.last() };
|
||||
if let Some(((_, _, value), ())) = entry.transpose()? {
|
||||
docids_values.push((docid, value));
|
||||
}
|
||||
}
|
||||
docids_values.sort_unstable_by_key(|(_, value)| *value);
|
||||
let iter = docids_values.into_iter().map(|(id, _)| id).take(limit);
|
||||
if ascending { Ok(iter.collect()) } else { Ok(iter.rev().collect()) }
|
||||
} else {
|
||||
let facet_fn = if ascending {
|
||||
FacetIter::<i64, FacetLevelValueI64Codec>::new
|
||||
} else {
|
||||
FacetIter::<i64, FacetLevelValueI64Codec>::new_reverse
|
||||
};
|
||||
for result in facet_fn(self.rtxn, self.index, field_id, documents_ids)? {
|
||||
let (_val, docids) = result?;
|
||||
limit_tmp = limit_tmp.saturating_sub(docids.len() as usize);
|
||||
output.push(docids);
|
||||
if limit_tmp == 0 { break }
|
||||
}
|
||||
Ok(output.into_iter().flatten().take(limit).collect())
|
||||
}
|
||||
},
|
||||
FacetType::String => bail!("criteria facet type must be a number"),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn execute(&self) -> anyhow::Result<SearchResult> {
|
||||
let limit = self.limit;
|
||||
let fst = self.index.words_fst(self.rtxn)?;
|
||||
@ -155,13 +240,34 @@ impl<'a> Search<'a> {
|
||||
};
|
||||
|
||||
// We create the original candidates with the facet conditions results.
|
||||
let before = Instant::now();
|
||||
let facet_candidates = match &self.facet_condition {
|
||||
Some(condition) => Some(condition.evaluate(self.rtxn, self.index)?),
|
||||
None => None,
|
||||
};
|
||||
|
||||
debug!("facet candidates: {:?}", facet_candidates);
|
||||
debug!("facet candidates: {:?} took {:.02?}", facet_candidates, before.elapsed());
|
||||
|
||||
let order_by_facet = {
|
||||
let criteria = self.index.criteria(self.rtxn)?;
|
||||
let result = criteria.into_iter().flat_map(|criterion| {
|
||||
match criterion {
|
||||
Criterion::Asc(fid) => Some((fid, true)),
|
||||
Criterion::Desc(fid) => Some((fid, false)),
|
||||
_ => None
|
||||
}
|
||||
}).next();
|
||||
match result {
|
||||
Some((fid, is_ascending)) => {
|
||||
let faceted_fields = self.index.faceted_fields(self.rtxn)?;
|
||||
let ftype = *faceted_fields.get(&fid).context("unknown field id")?;
|
||||
Some((fid, ftype, is_ascending))
|
||||
},
|
||||
None => None,
|
||||
}
|
||||
};
|
||||
|
||||
let before = Instant::now();
|
||||
let (candidates, derived_words) = match (facet_candidates, derived_words) {
|
||||
(Some(mut facet_candidates), Some(derived_words)) => {
|
||||
let words_candidates = Self::compute_candidates(&derived_words);
|
||||
@ -174,17 +280,28 @@ impl<'a> Search<'a> {
|
||||
(Some(facet_candidates), None) => {
|
||||
// If the query is not set or results in no DFAs but
|
||||
// there is some facet conditions we return a placeholder.
|
||||
let documents_ids = facet_candidates.iter().take(limit).collect();
|
||||
let documents_ids = match order_by_facet {
|
||||
Some((fid, ftype, is_ascending)) => {
|
||||
self.facet_ordered(fid, ftype, is_ascending, facet_candidates, limit)?
|
||||
},
|
||||
None => facet_candidates.iter().take(limit).collect(),
|
||||
};
|
||||
return Ok(SearchResult { documents_ids, ..Default::default() })
|
||||
},
|
||||
(None, None) => {
|
||||
// If the query is not set or results in no DFAs we return a placeholder.
|
||||
let documents_ids = self.index.documents_ids(self.rtxn)?.iter().take(limit).collect();
|
||||
let documents_ids = self.index.documents_ids(self.rtxn)?;
|
||||
let documents_ids = match order_by_facet {
|
||||
Some((fid, ftype, is_ascending)) => {
|
||||
self.facet_ordered(fid, ftype, is_ascending, documents_ids, limit)?
|
||||
},
|
||||
None => documents_ids.iter().take(limit).collect(),
|
||||
};
|
||||
return Ok(SearchResult { documents_ids, ..Default::default() })
|
||||
},
|
||||
};
|
||||
|
||||
debug!("candidates: {:?}", candidates);
|
||||
debug!("candidates: {:?} took {:.02?}", candidates, before.elapsed());
|
||||
|
||||
// The mana depth first search is a revised DFS that explore
|
||||
// solutions in the order of their proximities.
|
||||
@ -203,7 +320,19 @@ impl<'a> Search<'a> {
|
||||
}
|
||||
|
||||
let found_words = derived_words.into_iter().flat_map(|(w, _)| w).map(|(w, _)| w).collect();
|
||||
let documents_ids = documents.into_iter().flatten().take(limit).collect();
|
||||
let documents_ids = match order_by_facet {
|
||||
Some((fid, ftype, order)) => {
|
||||
let mut ordered_documents = Vec::new();
|
||||
for documents_ids in documents {
|
||||
let docids = self.facet_ordered(fid, ftype, order, documents_ids, limit)?;
|
||||
ordered_documents.push(docids);
|
||||
if ordered_documents.iter().map(Vec::len).sum::<usize>() >= limit { break }
|
||||
}
|
||||
ordered_documents.into_iter().flatten().take(limit).collect()
|
||||
},
|
||||
None => documents.into_iter().flatten().take(limit).collect(),
|
||||
};
|
||||
|
||||
Ok(SearchResult { found_words, documents_ids })
|
||||
}
|
||||
}
|
||||
|
@ -297,6 +297,7 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho
|
||||
docid_word_positions,
|
||||
word_pair_proximity_docids,
|
||||
facet_field_id_value_docids,
|
||||
field_id_docid_facet_values: _,
|
||||
documents,
|
||||
} = index;
|
||||
|
||||
|
@ -19,6 +19,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
|
||||
docid_word_positions,
|
||||
word_pair_proximity_docids,
|
||||
facet_field_id_value_docids,
|
||||
field_id_docid_facet_values,
|
||||
documents,
|
||||
} = self.index;
|
||||
|
||||
@ -41,6 +42,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
|
||||
docid_word_positions.clear(self.wtxn)?;
|
||||
word_pair_proximity_docids.clear(self.wtxn)?;
|
||||
facet_field_id_value_docids.clear(self.wtxn)?;
|
||||
field_id_docid_facet_values.clear(self.wtxn)?;
|
||||
documents.clear(self.wtxn)?;
|
||||
|
||||
Ok(number_of_documents)
|
||||
|
@ -2,7 +2,9 @@ use fst::IntoStreamer;
|
||||
use heed::types::ByteSlice;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::facet::FacetType;
|
||||
use crate::{Index, BEU32, SmallString32, ExternalDocumentsIds};
|
||||
use crate::heed_codec::facet::{FieldDocIdFacetStringCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetI64Codec};
|
||||
use super::ClearDocuments;
|
||||
|
||||
pub struct DeleteDocuments<'t, 'u, 'i> {
|
||||
@ -75,6 +77,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
||||
docid_word_positions,
|
||||
word_pair_proximity_docids,
|
||||
facet_field_id_value_docids,
|
||||
field_id_docid_facet_values,
|
||||
documents,
|
||||
} = self.index;
|
||||
|
||||
@ -186,10 +189,42 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
||||
|
||||
// Remove the documents ids from the faceted documents ids.
|
||||
let faceted_fields = self.index.faceted_fields(self.wtxn)?;
|
||||
for (field_id, _) in faceted_fields {
|
||||
for (field_id, facet_type) in faceted_fields {
|
||||
let mut docids = self.index.faceted_documents_ids(self.wtxn, field_id)?;
|
||||
docids.difference_with(&self.documents_ids);
|
||||
self.index.put_faceted_documents_ids(self.wtxn, field_id, &docids)?;
|
||||
|
||||
// We delete the entries that are part of the documents ids.
|
||||
let iter = field_id_docid_facet_values.prefix_iter_mut(self.wtxn, &[field_id])?;
|
||||
match facet_type {
|
||||
FacetType::String => {
|
||||
let mut iter = iter.remap_key_type::<FieldDocIdFacetStringCodec>();
|
||||
while let Some(result) = iter.next() {
|
||||
let ((_fid, docid, _value), ()) = result?;
|
||||
if self.documents_ids.contains(docid) {
|
||||
iter.del_current()?;
|
||||
}
|
||||
}
|
||||
},
|
||||
FacetType::Float => {
|
||||
let mut iter = iter.remap_key_type::<FieldDocIdFacetF64Codec>();
|
||||
while let Some(result) = iter.next() {
|
||||
let ((_fid, docid, _value), ()) = result?;
|
||||
if self.documents_ids.contains(docid) {
|
||||
iter.del_current()?;
|
||||
}
|
||||
}
|
||||
},
|
||||
FacetType::Integer => {
|
||||
let mut iter = iter.remap_key_type::<FieldDocIdFacetI64Codec>();
|
||||
while let Some(result) = iter.next() {
|
||||
let ((_fid, docid, _value), ()) = result?;
|
||||
if self.documents_ids.contains(docid) {
|
||||
iter.del_current()?;
|
||||
}
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// We delete the documents ids that are under the facet field id values.
|
||||
@ -205,6 +240,8 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
||||
}
|
||||
}
|
||||
|
||||
drop(iter);
|
||||
|
||||
Ok(self.documents_ids.len() as usize)
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
use std::borrow::Cow;
|
||||
|
||||
use anyhow::{bail, ensure};
|
||||
use anyhow::{bail, ensure, Context};
|
||||
use bstr::ByteSlice as _;
|
||||
use fst::IntoStreamer;
|
||||
use roaring::RoaringBitmap;
|
||||
@ -42,6 +42,12 @@ pub fn docid_word_positions_merge(key: &[u8], _values: &[Cow<[u8]>]) -> anyhow::
|
||||
bail!("merging docid word positions is an error ({:?})", key.as_bstr())
|
||||
}
|
||||
|
||||
pub fn field_id_docid_facet_values_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
|
||||
let first = values.first().context("no value to merge")?;
|
||||
ensure!(values.iter().all(|v| v == first), "invalid field id docid facet value merging");
|
||||
Ok(first.to_vec())
|
||||
}
|
||||
|
||||
pub fn words_pairs_proximities_docids_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
|
||||
cbo_roaring_bitmap_merge(values)
|
||||
}
|
||||
|
@ -21,6 +21,7 @@ use self::store::{Store, Readers};
|
||||
use self::merge_function::{
|
||||
main_merge, word_docids_merge, words_pairs_proximities_docids_merge,
|
||||
docid_word_positions_merge, documents_merge, facet_field_value_docids_merge,
|
||||
field_id_docid_facet_values_merge,
|
||||
};
|
||||
pub use self::transform::{Transform, TransformOutput};
|
||||
|
||||
@ -395,6 +396,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
||||
let mut docid_word_positions_readers = Vec::with_capacity(readers.len());
|
||||
let mut words_pairs_proximities_docids_readers = Vec::with_capacity(readers.len());
|
||||
let mut facet_field_value_docids_readers = Vec::with_capacity(readers.len());
|
||||
let mut field_id_docid_facet_values_readers = Vec::with_capacity(readers.len());
|
||||
let mut documents_readers = Vec::with_capacity(readers.len());
|
||||
readers.into_iter().for_each(|readers| {
|
||||
let Readers {
|
||||
@ -403,6 +405,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
||||
docid_word_positions,
|
||||
words_pairs_proximities_docids,
|
||||
facet_field_value_docids,
|
||||
field_id_docid_facet_values,
|
||||
documents
|
||||
} = readers;
|
||||
main_readers.push(main);
|
||||
@ -410,6 +413,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
||||
docid_word_positions_readers.push(docid_word_positions);
|
||||
words_pairs_proximities_docids_readers.push(words_pairs_proximities_docids);
|
||||
facet_field_value_docids_readers.push(facet_field_value_docids);
|
||||
field_id_docid_facet_values_readers.push(field_id_docid_facet_values);
|
||||
documents_readers.push(documents);
|
||||
});
|
||||
|
||||
@ -453,6 +457,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
||||
docid_word_positions_readers,
|
||||
documents_readers,
|
||||
words_pairs_proximities_docids_readers,
|
||||
field_id_docid_facet_values_readers,
|
||||
)) as anyhow::Result<_>
|
||||
})?;
|
||||
|
||||
@ -461,6 +466,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
||||
docid_word_positions_readers,
|
||||
documents_readers,
|
||||
words_pairs_proximities_docids_readers,
|
||||
field_id_docid_facet_values_readers,
|
||||
) = readers;
|
||||
|
||||
let mut documents_ids = self.index.documents_ids(self.wtxn)?;
|
||||
@ -488,7 +494,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
||||
self.index.put_documents_ids(self.wtxn, &documents_ids)?;
|
||||
|
||||
let mut database_count = 0;
|
||||
let total_databases = 6;
|
||||
let total_databases = 7;
|
||||
|
||||
progress_callback(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
||||
databases_seen: 0,
|
||||
@ -525,6 +531,21 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
||||
total_databases,
|
||||
});
|
||||
|
||||
debug!("Writing the field id docid facet values into LMDB on disk...");
|
||||
merge_into_lmdb_database(
|
||||
self.wtxn,
|
||||
*self.index.field_id_docid_facet_values.as_polymorph(),
|
||||
field_id_docid_facet_values_readers,
|
||||
field_id_docid_facet_values_merge,
|
||||
write_method,
|
||||
)?;
|
||||
|
||||
database_count += 1;
|
||||
progress_callback(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
||||
databases_seen: database_count,
|
||||
total_databases,
|
||||
});
|
||||
|
||||
debug!("Writing the words pairs proximities docids into LMDB on disk...");
|
||||
merge_into_lmdb_database(
|
||||
self.wtxn,
|
||||
|
@ -20,14 +20,15 @@ use tempfile::tempfile;
|
||||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::{BoRoaringBitmapCodec, CboRoaringBitmapCodec};
|
||||
use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec, FacetLevelValueI64Codec};
|
||||
use crate::heed_codec::facet::{FieldDocIdFacetStringCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetI64Codec};
|
||||
use crate::tokenizer::{simple_tokenizer, only_token};
|
||||
use crate::update::UpdateIndexingStep;
|
||||
use crate::{json_to_string, SmallVec8, SmallVec32, SmallString32, Position, DocumentId};
|
||||
use crate::{json_to_string, SmallVec8, SmallVec32, SmallString32, Position, DocumentId, FieldId};
|
||||
|
||||
use super::{MergeFn, create_writer, create_sorter, writer_into_reader};
|
||||
use super::merge_function::{
|
||||
main_merge, word_docids_merge, words_pairs_proximities_docids_merge,
|
||||
facet_field_value_docids_merge,
|
||||
facet_field_value_docids_merge, field_id_docid_facet_values_merge,
|
||||
};
|
||||
|
||||
const LMDB_MAX_KEY_LENGTH: usize = 511;
|
||||
@ -42,13 +43,14 @@ pub struct Readers {
|
||||
pub docid_word_positions: Reader<FileFuse>,
|
||||
pub words_pairs_proximities_docids: Reader<FileFuse>,
|
||||
pub facet_field_value_docids: Reader<FileFuse>,
|
||||
pub field_id_docid_facet_values: Reader<FileFuse>,
|
||||
pub documents: Reader<FileFuse>,
|
||||
}
|
||||
|
||||
pub struct Store {
|
||||
// Indexing parameters
|
||||
searchable_fields: HashSet<u8>,
|
||||
faceted_fields: HashMap<u8, FacetType>,
|
||||
searchable_fields: HashSet<FieldId>,
|
||||
faceted_fields: HashMap<FieldId, FacetType>,
|
||||
// Caches
|
||||
word_docids: LinkedHashMap<SmallVec32<u8>, RoaringBitmap>,
|
||||
word_docids_limit: usize,
|
||||
@ -65,6 +67,7 @@ pub struct Store {
|
||||
word_docids_sorter: Sorter<MergeFn>,
|
||||
words_pairs_proximities_docids_sorter: Sorter<MergeFn>,
|
||||
facet_field_value_docids_sorter: Sorter<MergeFn>,
|
||||
field_id_docid_facet_values_sorter: Sorter<MergeFn>,
|
||||
// MTBL writers
|
||||
docid_word_positions_writer: Writer<File>,
|
||||
documents_writer: Writer<File>,
|
||||
@ -72,8 +75,8 @@ pub struct Store {
|
||||
|
||||
impl Store {
|
||||
pub fn new(
|
||||
searchable_fields: HashSet<u8>,
|
||||
faceted_fields: HashMap<u8, FacetType>,
|
||||
searchable_fields: HashSet<FieldId>,
|
||||
faceted_fields: HashMap<FieldId, FacetType>,
|
||||
linked_hash_map_size: Option<usize>,
|
||||
max_nb_chunks: Option<usize>,
|
||||
max_memory: Option<usize>,
|
||||
@ -118,6 +121,14 @@ impl Store {
|
||||
max_nb_chunks,
|
||||
max_memory,
|
||||
);
|
||||
let field_id_docid_facet_values_sorter = create_sorter(
|
||||
field_id_docid_facet_values_merge,
|
||||
chunk_compression_type,
|
||||
chunk_compression_level,
|
||||
chunk_fusing_shrink_size,
|
||||
max_nb_chunks,
|
||||
Some(1024 * 1024 * 1024), // 1MB
|
||||
);
|
||||
|
||||
let documents_writer = tempfile().and_then(|f| {
|
||||
create_writer(chunk_compression_type, chunk_compression_level, f)
|
||||
@ -146,6 +157,7 @@ impl Store {
|
||||
word_docids_sorter,
|
||||
words_pairs_proximities_docids_sorter,
|
||||
facet_field_value_docids_sorter,
|
||||
field_id_docid_facet_values_sorter,
|
||||
// MTBL writers
|
||||
docid_word_positions_writer,
|
||||
documents_writer,
|
||||
@ -176,11 +188,13 @@ impl Store {
|
||||
// Save the documents ids under the facet field id and value we have seen it.
|
||||
fn insert_facet_values_docid(
|
||||
&mut self,
|
||||
field_id: u8,
|
||||
field_id: FieldId,
|
||||
field_value: FacetValue,
|
||||
id: DocumentId,
|
||||
) -> anyhow::Result<()>
|
||||
{
|
||||
Self::write_field_id_docid_facet_value(&mut self.field_id_docid_facet_values_sorter, field_id, id, &field_value)?;
|
||||
|
||||
let key = (field_id, field_value);
|
||||
// if get_refresh finds the element it is assured to be at the end of the linked hash map.
|
||||
match self.facet_field_value_docids.get_refresh(&key) {
|
||||
@ -192,7 +206,7 @@ impl Store {
|
||||
// one element, this way next time we insert we doesn't grow the capacity.
|
||||
if self.facet_field_value_docids.len() == self.facet_field_value_docids_limit {
|
||||
// Removing the front element is equivalent to removing the LRU element.
|
||||
Self::write_docid_facet_field_values(
|
||||
Self::write_facet_field_value_docids(
|
||||
&mut self.facet_field_value_docids_sorter,
|
||||
self.facet_field_value_docids.pop_front(),
|
||||
)?;
|
||||
@ -243,7 +257,7 @@ impl Store {
|
||||
&mut self,
|
||||
document_id: DocumentId,
|
||||
words_positions: &mut HashMap<String, SmallVec32<Position>>,
|
||||
facet_values: &mut HashMap<u8, SmallVec8<FacetValue>>,
|
||||
facet_values: &mut HashMap<FieldId, SmallVec8<FacetValue>>,
|
||||
record: &[u8],
|
||||
) -> anyhow::Result<()>
|
||||
{
|
||||
@ -326,11 +340,11 @@ impl Store {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_docid_facet_field_values<I>(
|
||||
fn write_facet_field_value_docids<I>(
|
||||
sorter: &mut Sorter<MergeFn>,
|
||||
iter: I,
|
||||
) -> anyhow::Result<()>
|
||||
where I: IntoIterator<Item=((u8, FacetValue), RoaringBitmap)>
|
||||
where I: IntoIterator<Item=((FieldId, FacetValue), RoaringBitmap)>
|
||||
{
|
||||
use FacetValue::*;
|
||||
|
||||
@ -351,6 +365,29 @@ impl Store {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_field_id_docid_facet_value(
|
||||
sorter: &mut Sorter<MergeFn>,
|
||||
field_id: FieldId,
|
||||
document_id: DocumentId,
|
||||
value: &FacetValue,
|
||||
) -> anyhow::Result<()>
|
||||
{
|
||||
use FacetValue::*;
|
||||
|
||||
let result = match value {
|
||||
String(s) => FieldDocIdFacetStringCodec::bytes_encode(&(field_id, document_id, s)).map(Cow::into_owned),
|
||||
Float(f) => FieldDocIdFacetF64Codec::bytes_encode(&(field_id, document_id, **f)).map(Cow::into_owned),
|
||||
Integer(i) => FieldDocIdFacetI64Codec::bytes_encode(&(field_id, document_id, *i)).map(Cow::into_owned),
|
||||
};
|
||||
|
||||
let key = result.context("could not serialize facet key")?;
|
||||
if lmdb_key_valid_size(&key) {
|
||||
sorter.insert(&key, &[])?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_word_docids<I>(sorter: &mut Sorter<MergeFn>, iter: I) -> anyhow::Result<()>
|
||||
where I: IntoIterator<Item=(SmallVec32<u8>, RoaringBitmap)>
|
||||
{
|
||||
@ -463,7 +500,7 @@ impl Store {
|
||||
&mut self.words_pairs_proximities_docids_sorter,
|
||||
self.words_pairs_proximities_docids,
|
||||
)?;
|
||||
Self::write_docid_facet_field_values(
|
||||
Self::write_facet_field_value_docids(
|
||||
&mut self.facet_field_value_docids_sorter,
|
||||
self.facet_field_value_docids,
|
||||
)?;
|
||||
@ -491,10 +528,14 @@ impl Store {
|
||||
let mut facet_field_value_docids_wtr = tempfile().and_then(|f| create_writer(comp_type, comp_level, f))?;
|
||||
self.facet_field_value_docids_sorter.write_into(&mut facet_field_value_docids_wtr)?;
|
||||
|
||||
let mut field_id_docid_facet_values_wtr = tempfile().and_then(|f| create_writer(comp_type, comp_level, f))?;
|
||||
self.field_id_docid_facet_values_sorter.write_into(&mut field_id_docid_facet_values_wtr)?;
|
||||
|
||||
let main = writer_into_reader(main_wtr, shrink_size)?;
|
||||
let word_docids = writer_into_reader(word_docids_wtr, shrink_size)?;
|
||||
let words_pairs_proximities_docids = writer_into_reader(words_pairs_proximities_docids_wtr, shrink_size)?;
|
||||
let facet_field_value_docids = writer_into_reader(facet_field_value_docids_wtr, shrink_size)?;
|
||||
let field_id_docid_facet_values = writer_into_reader(field_id_docid_facet_values_wtr, shrink_size)?;
|
||||
let docid_word_positions = writer_into_reader(self.docid_word_positions_writer, shrink_size)?;
|
||||
let documents = writer_into_reader(self.documents_writer, shrink_size)?;
|
||||
|
||||
@ -504,6 +545,7 @@ impl Store {
|
||||
docid_word_positions,
|
||||
words_pairs_proximities_docids,
|
||||
facet_field_value_docids,
|
||||
field_id_docid_facet_values,
|
||||
documents,
|
||||
})
|
||||
}
|
||||
|
@ -10,13 +10,13 @@ use log::info;
|
||||
use roaring::RoaringBitmap;
|
||||
use serde_json::{Map, Value};
|
||||
|
||||
use crate::{BEU32, MergeFn, Index, FieldsIdsMap, ExternalDocumentsIds};
|
||||
use crate::{BEU32, MergeFn, Index, FieldId, FieldsIdsMap, ExternalDocumentsIds};
|
||||
use crate::update::{AvailableDocumentsIds, UpdateIndexingStep};
|
||||
use super::merge_function::merge_two_obkvs;
|
||||
use super::{create_writer, create_sorter, IndexDocumentsMethod};
|
||||
|
||||
pub struct TransformOutput {
|
||||
pub primary_key: u8,
|
||||
pub primary_key: FieldId,
|
||||
pub fields_ids_map: FieldsIdsMap,
|
||||
pub external_documents_ids: ExternalDocumentsIds<'static>,
|
||||
pub new_documents_ids: RoaringBitmap,
|
||||
@ -365,7 +365,7 @@ impl Transform<'_, '_> {
|
||||
fn output_from_sorter<F>(
|
||||
self,
|
||||
sorter: grenad::Sorter<MergeFn>,
|
||||
primary_key: u8,
|
||||
primary_key: FieldId,
|
||||
fields_ids_map: FieldsIdsMap,
|
||||
approximate_number_of_documents: usize,
|
||||
mut external_documents_ids: ExternalDocumentsIds<'_>,
|
||||
@ -477,7 +477,7 @@ impl Transform<'_, '_> {
|
||||
// TODO this can be done in parallel by using the rayon `ThreadPool`.
|
||||
pub fn remap_index_documents(
|
||||
self,
|
||||
primary_key: u8,
|
||||
primary_key: FieldId,
|
||||
fields_ids_map: FieldsIdsMap,
|
||||
) -> anyhow::Result<TransformOutput>
|
||||
{
|
||||
|
@ -8,7 +8,7 @@ use rayon::ThreadPool;
|
||||
use crate::update::index_documents::{Transform, IndexDocumentsMethod};
|
||||
use crate::update::{ClearDocuments, IndexDocuments, UpdateIndexingStep};
|
||||
use crate::facet::FacetType;
|
||||
use crate::{Index, FieldsIdsMap};
|
||||
use crate::{Index, FieldsIdsMap, Criterion};
|
||||
|
||||
pub struct Settings<'a, 't, 'u, 'i> {
|
||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||
@ -27,6 +27,7 @@ pub struct Settings<'a, 't, 'u, 'i> {
|
||||
searchable_fields: Option<Option<Vec<String>>>,
|
||||
displayed_fields: Option<Option<Vec<String>>>,
|
||||
faceted_fields: Option<HashMap<String, String>>,
|
||||
criteria: Option<Option<Vec<String>>>,
|
||||
}
|
||||
|
||||
impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
@ -45,6 +46,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
searchable_fields: None,
|
||||
displayed_fields: None,
|
||||
faceted_fields: None,
|
||||
criteria: None,
|
||||
}
|
||||
}
|
||||
|
||||
@ -68,6 +70,14 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
self.faceted_fields = Some(names_facet_types);
|
||||
}
|
||||
|
||||
pub fn reset_criteria(&mut self) {
|
||||
self.criteria = Some(None);
|
||||
}
|
||||
|
||||
pub fn set_criteria(&mut self, criteria: Vec<String>) {
|
||||
self.criteria = Some(Some(criteria));
|
||||
}
|
||||
|
||||
pub fn execute<F>(self, progress_callback: F) -> anyhow::Result<()>
|
||||
where
|
||||
F: Fn(UpdateIndexingStep) + Sync
|
||||
@ -75,6 +85,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
let mut updated_searchable_fields = None;
|
||||
let mut updated_faceted_fields = None;
|
||||
let mut updated_displayed_fields = None;
|
||||
let mut updated_criteria = None;
|
||||
|
||||
// Construct the new FieldsIdsMap based on the searchable fields order.
|
||||
let fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||
@ -113,9 +124,8 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
None => fields_ids_map.insert("id").context("field id limit reached")?,
|
||||
};
|
||||
|
||||
let current_faceted_fields = self.index.faceted_fields(self.wtxn)?;
|
||||
if let Some(fields_names_facet_types) = self.faceted_fields {
|
||||
let current_faceted_fields = self.index.faceted_fields(self.wtxn)?;
|
||||
|
||||
let mut faceted_fields = HashMap::new();
|
||||
for (name, sftype) in fields_names_facet_types {
|
||||
let ftype = FacetType::from_str(&sftype).with_context(|| format!("parsing facet type {:?}", sftype))?;
|
||||
@ -147,6 +157,25 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(criteria) = self.criteria {
|
||||
match criteria {
|
||||
Some(criteria_names) => {
|
||||
let mut new_criteria = Vec::new();
|
||||
for name in criteria_names {
|
||||
let criterion = Criterion::from_str(&mut fields_ids_map, &name)?;
|
||||
if let Some(fid) = criterion.field_id() {
|
||||
let name = fields_ids_map.name(fid).unwrap();
|
||||
let faceted_fields = updated_faceted_fields.as_ref().unwrap_or(¤t_faceted_fields);
|
||||
ensure!(faceted_fields.contains_key(&fid), "criterion field {} must be faceted", name);
|
||||
}
|
||||
new_criteria.push(criterion);
|
||||
}
|
||||
updated_criteria = Some(Some(new_criteria));
|
||||
},
|
||||
None => updated_criteria = Some(None),
|
||||
}
|
||||
}
|
||||
|
||||
// If any setting have modified any of the datastructures it means that we need
|
||||
// to retrieve the documents and then reindex then with the new settings.
|
||||
if updated_searchable_fields.is_some() || updated_faceted_fields.is_some() {
|
||||
@ -202,14 +231,19 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
}
|
||||
|
||||
if let Some(displayed_fields) = updated_displayed_fields {
|
||||
// We write the displayed fields into the database here
|
||||
// to make sure that the right fields are displayed.
|
||||
match displayed_fields {
|
||||
Some(fields) => self.index.put_displayed_fields(self.wtxn, &fields)?,
|
||||
None => self.index.delete_displayed_fields(self.wtxn).map(drop)?,
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(criteria) = updated_criteria {
|
||||
match criteria {
|
||||
Some(criteria) => self.index.put_criteria(self.wtxn, &criteria)?,
|
||||
None => self.index.delete_criteria(self.wtxn).map(drop)?,
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user