2020-11-21 13:09:49 +01:00
|
|
|
#[macro_use] extern crate pest_derive;
|
|
|
|
|
2020-08-12 10:43:02 +02:00
|
|
|
mod criterion;
|
2020-11-22 14:48:42 +01:00
|
|
|
mod external_documents_ids;
|
2020-10-22 12:50:04 +02:00
|
|
|
mod fields_ids_map;
|
2020-08-13 14:15:05 +02:00
|
|
|
mod search;
|
2020-11-29 16:26:25 +01:00
|
|
|
mod update_store;
|
2020-11-11 15:48:24 +01:00
|
|
|
pub mod facet;
|
2020-08-28 14:16:37 +02:00
|
|
|
pub mod heed_codec;
|
2021-02-14 18:55:15 +01:00
|
|
|
pub mod index;
|
2020-09-22 10:53:20 +02:00
|
|
|
pub mod proximity;
|
2021-03-18 17:20:16 +01:00
|
|
|
pub mod tree_level;
|
2020-10-25 18:32:01 +01:00
|
|
|
pub mod update;
|
2020-06-04 20:25:51 +02:00
|
|
|
|
2020-10-31 16:10:15 +01:00
|
|
|
use std::borrow::Cow;
|
2020-08-13 14:15:05 +02:00
|
|
|
use std::collections::HashMap;
|
2020-05-31 16:09:34 +02:00
|
|
|
use std::hash::BuildHasherDefault;
|
2020-10-31 16:10:15 +01:00
|
|
|
|
2020-11-05 13:34:15 +01:00
|
|
|
use anyhow::Context;
|
2020-06-29 22:25:59 +02:00
|
|
|
use fxhash::{FxHasher32, FxHasher64};
|
2020-11-05 13:34:15 +01:00
|
|
|
use serde_json::{Map, Value};
|
2020-06-04 20:25:51 +02:00
|
|
|
|
2020-08-13 14:15:05 +02:00
|
|
|
pub use self::criterion::{Criterion, default_criteria};
|
2020-11-22 17:53:33 +01:00
|
|
|
pub use self::external_documents_ids::ExternalDocumentsIds;
|
2020-10-23 14:11:00 +02:00
|
|
|
pub use self::fields_ids_map::FieldsIdsMap;
|
2021-05-27 15:27:41 +02:00
|
|
|
pub use self::heed_codec::{BEU32StrCodec, StrStrU8Codec, StrLevelPositionCodec, ObkvCodec, FieldIdWordCountCodec};
|
2020-11-29 16:26:25 +01:00
|
|
|
pub use self::heed_codec::{RoaringBitmapCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec};
|
2021-02-18 14:24:30 +01:00
|
|
|
pub use self::heed_codec::{RoaringBitmapLenCodec, BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec};
|
2020-10-21 15:55:48 +02:00
|
|
|
pub use self::index::Index;
|
2021-06-01 15:25:17 +02:00
|
|
|
pub use self::search::{Search, FacetDistribution, FilterCondition, SearchResult, MatchingWords};
|
2021-03-18 17:20:16 +01:00
|
|
|
pub use self::tree_level::TreeLevel;
|
2020-11-29 16:26:25 +01:00
|
|
|
pub use self::update_store::UpdateStore;
|
2020-05-31 16:09:34 +02:00
|
|
|
|
|
|
|
pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>;
|
2020-06-29 22:25:59 +02:00
|
|
|
pub type FastMap8<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher64>>;
|
2020-05-31 16:09:34 +02:00
|
|
|
pub type SmallString32 = smallstr::SmallString<[u8; 32]>;
|
2020-06-11 11:55:03 +02:00
|
|
|
pub type SmallVec16<T> = smallvec::SmallVec<[T; 16]>;
|
2021-02-17 11:12:38 +01:00
|
|
|
pub type SmallVec32<T> = smallvec::SmallVec<[T; 32]>;
|
2020-11-13 14:49:48 +01:00
|
|
|
pub type SmallVec8<T> = smallvec::SmallVec<[T; 8]>;
|
2020-05-31 16:09:34 +02:00
|
|
|
pub type BEU32 = heed::zerocopy::U32<heed::byteorder::BE>;
|
2020-10-18 15:16:57 +02:00
|
|
|
pub type BEU64 = heed::zerocopy::U64<heed::byteorder::BE>;
|
2020-08-06 11:08:24 +02:00
|
|
|
pub type Attribute = u32;
|
2020-11-26 17:38:08 +01:00
|
|
|
pub type DocumentId = u32;
|
|
|
|
pub type FieldId = u8;
|
2020-07-07 12:21:22 +02:00
|
|
|
pub type Position = u32;
|
2021-04-01 10:07:16 +03:00
|
|
|
pub type FieldsDistribution = HashMap<String, u64>;
|
2020-10-31 16:10:15 +01:00
|
|
|
|
|
|
|
type MergeFn = for<'a> fn(&[u8], &[Cow<'a, [u8]>]) -> anyhow::Result<Vec<u8>>;
|
2020-11-05 13:34:15 +01:00
|
|
|
|
|
|
|
/// Transform a raw obkv store into a JSON Object.
|
|
|
|
pub fn obkv_to_json(
|
2020-11-26 17:38:08 +01:00
|
|
|
displayed_fields: &[FieldId],
|
2020-11-05 13:34:15 +01:00
|
|
|
fields_ids_map: &FieldsIdsMap,
|
|
|
|
obkv: obkv::KvReader,
|
|
|
|
) -> anyhow::Result<Map<String, Value>>
|
|
|
|
{
|
|
|
|
displayed_fields.iter()
|
|
|
|
.copied()
|
|
|
|
.flat_map(|id| obkv.get(id).map(|value| (id, value)))
|
|
|
|
.map(|(id, value)| {
|
|
|
|
let name = fields_ids_map.name(id).context("unknown obkv field id")?;
|
|
|
|
let value = serde_json::from_slice(value)?;
|
|
|
|
Ok((name.to_owned(), value))
|
|
|
|
})
|
|
|
|
.collect()
|
|
|
|
}
|
2020-11-06 16:15:07 +01:00
|
|
|
|
|
|
|
/// Transform a JSON value into a string that can be indexed.
|
2020-11-11 17:33:05 +01:00
|
|
|
pub fn json_to_string(value: &Value) -> Option<String> {
|
2020-11-06 16:15:07 +01:00
|
|
|
|
2020-11-11 17:33:05 +01:00
|
|
|
fn inner(value: &Value, output: &mut String) -> bool {
|
2020-11-06 16:15:07 +01:00
|
|
|
use std::fmt::Write;
|
|
|
|
match value {
|
|
|
|
Value::Null => false,
|
|
|
|
Value::Bool(boolean) => write!(output, "{}", boolean).is_ok(),
|
|
|
|
Value::Number(number) => write!(output, "{}", number).is_ok(),
|
|
|
|
Value::String(string) => write!(output, "{}", string).is_ok(),
|
|
|
|
Value::Array(array) => {
|
|
|
|
let mut count = 0;
|
|
|
|
for value in array {
|
|
|
|
if inner(value, output) {
|
|
|
|
output.push_str(". ");
|
|
|
|
count += 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// check that at least one value was written
|
|
|
|
count != 0
|
|
|
|
},
|
|
|
|
Value::Object(object) => {
|
|
|
|
let mut buffer = String::new();
|
|
|
|
let mut count = 0;
|
|
|
|
for (key, value) in object {
|
|
|
|
buffer.clear();
|
|
|
|
let _ = write!(&mut buffer, "{}: ", key);
|
|
|
|
if inner(value, &mut buffer) {
|
|
|
|
buffer.push_str(". ");
|
|
|
|
// We write the "key: value. " pair only when
|
|
|
|
// we are sure that the value can be written.
|
|
|
|
output.push_str(&buffer);
|
|
|
|
count += 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// check that at least one value was written
|
|
|
|
count != 0
|
|
|
|
},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
let mut string = String::new();
|
|
|
|
if inner(value, &mut string) {
|
|
|
|
Some(string)
|
|
|
|
} else {
|
|
|
|
None
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use super::*;
|
|
|
|
use serde_json::json;
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn json_to_string_object() {
|
|
|
|
let value = json!({
|
|
|
|
"name": "John Doe",
|
|
|
|
"age": 43,
|
|
|
|
"not_there": null,
|
|
|
|
});
|
|
|
|
|
2020-11-11 17:33:05 +01:00
|
|
|
let string = json_to_string(&value).unwrap();
|
2020-11-06 16:15:07 +01:00
|
|
|
assert_eq!(string, "name: John Doe. age: 43. ");
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn json_to_string_array() {
|
|
|
|
let value = json!([
|
|
|
|
{ "name": "John Doe" },
|
|
|
|
43,
|
|
|
|
"hello",
|
|
|
|
[ "I", "am", "fine" ],
|
|
|
|
null,
|
|
|
|
]);
|
|
|
|
|
2020-11-11 17:33:05 +01:00
|
|
|
let string = json_to_string(&value).unwrap();
|
2020-11-06 16:15:07 +01:00
|
|
|
// We don't care about having two point (.) after the other as
|
|
|
|
// the distance of hard separators is clamped to 8 anyway.
|
|
|
|
assert_eq!(string, "name: John Doe. . 43. hello. I. am. fine. . ");
|
|
|
|
}
|
|
|
|
}
|