mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-11 14:04:31 +01:00
Merge #225
225: Introduce the error handler r=ManyTheFish a=Kerollmops Fixes #109. Co-authored-by: Kerollmops <clement@meilisearch.com> Co-authored-by: many <maxime@meilisearch.com>
This commit is contained in:
commit
02e0271e44
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -1377,7 +1377,6 @@ dependencies = [
|
|||||||
name = "milli"
|
name = "milli"
|
||||||
version = "0.3.1"
|
version = "0.3.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
|
||||||
"big_s",
|
"big_s",
|
||||||
"bstr",
|
"bstr",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
|
@ -55,15 +55,15 @@ pub fn base_setup(conf: &Conf) -> Index {
|
|||||||
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
|
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
|
||||||
options.max_readers(10);
|
options.max_readers(10);
|
||||||
let index = Index::new(options, conf.database_name).unwrap();
|
let index = Index::new(options, conf.database_name).unwrap();
|
||||||
if let Some(primary_key) = conf.primary_key {
|
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
|
||||||
index.put_primary_key(&mut wtxn, primary_key).unwrap();
|
|
||||||
}
|
|
||||||
|
|
||||||
let update_builder = UpdateBuilder::new(0);
|
let update_builder = UpdateBuilder::new(0);
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let mut builder = update_builder.settings(&mut wtxn, &index);
|
let mut builder = update_builder.settings(&mut wtxn, &index);
|
||||||
|
|
||||||
|
if let Some(primary_key) = conf.primary_key {
|
||||||
|
builder.set_primary_key(primary_key.to_string());
|
||||||
|
}
|
||||||
|
|
||||||
if let Some(criterion) = conf.criterion {
|
if let Some(criterion) = conf.criterion {
|
||||||
builder.reset_filterable_fields();
|
builder.reset_filterable_fields();
|
||||||
builder.reset_criteria();
|
builder.reset_criteria();
|
||||||
|
@ -391,7 +391,7 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
|
|
||||||
match result {
|
match result {
|
||||||
Ok(_) => wtxn.commit().map_err(Into::into),
|
Ok(_) => wtxn.commit().map_err(Into::into),
|
||||||
Err(e) => Err(e)
|
Err(e) => Err(e.into()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
UpdateMeta::ClearDocuments => {
|
UpdateMeta::ClearDocuments => {
|
||||||
@ -401,7 +401,7 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
|
|
||||||
match builder.execute() {
|
match builder.execute() {
|
||||||
Ok(_count) => wtxn.commit().map_err(Into::into),
|
Ok(_count) => wtxn.commit().map_err(Into::into),
|
||||||
Err(e) => Err(e)
|
Err(e) => Err(e.into()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
UpdateMeta::Settings(settings) => {
|
UpdateMeta::Settings(settings) => {
|
||||||
@ -471,7 +471,7 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
|
|
||||||
match result {
|
match result {
|
||||||
Ok(_count) => wtxn.commit().map_err(Into::into),
|
Ok(_count) => wtxn.commit().map_err(Into::into),
|
||||||
Err(e) => Err(e)
|
Err(e) => Err(e.into()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
UpdateMeta::Facets(levels) => {
|
UpdateMeta::Facets(levels) => {
|
||||||
@ -486,7 +486,7 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
}
|
}
|
||||||
match builder.execute() {
|
match builder.execute() {
|
||||||
Ok(()) => wtxn.commit().map_err(Into::into),
|
Ok(()) => wtxn.commit().map_err(Into::into),
|
||||||
Err(e) => Err(e)
|
Err(e) => Err(e.into()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -5,55 +5,41 @@ use std::{str, io, fmt};
|
|||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
use byte_unit::Byte;
|
use byte_unit::Byte;
|
||||||
use heed::EnvOpenOptions;
|
use heed::EnvOpenOptions;
|
||||||
use milli::facet::FacetType;
|
|
||||||
use milli::{Index, TreeLevel};
|
|
||||||
use structopt::StructOpt;
|
use structopt::StructOpt;
|
||||||
|
|
||||||
|
use milli::facet::FacetType;
|
||||||
|
use milli::index::db_name::*;
|
||||||
|
use milli::{Index, TreeLevel};
|
||||||
|
|
||||||
use Command::*;
|
use Command::*;
|
||||||
|
|
||||||
#[cfg(target_os = "linux")]
|
#[cfg(target_os = "linux")]
|
||||||
#[global_allocator]
|
#[global_allocator]
|
||||||
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
|
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
|
||||||
|
|
||||||
const MAIN_DB_NAME: &str = "main";
|
|
||||||
const WORD_DOCIDS_DB_NAME: &str = "word-docids";
|
|
||||||
const WORD_PREFIX_DOCIDS_DB_NAME: &str = "word-prefix-docids";
|
|
||||||
const DOCID_WORD_POSITIONS_DB_NAME: &str = "docid-word-positions";
|
|
||||||
const WORD_PAIR_PROXIMITY_DOCIDS_DB_NAME: &str = "word-pair-proximity-docids";
|
|
||||||
const WORD_PREFIX_PAIR_PROXIMITY_DOCIDS_DB_NAME: &str = "word-prefix-pair-proximity-docids";
|
|
||||||
const WORD_LEVEL_POSITION_DOCIDS_DB_NAME: &str = "word-level-position-docids";
|
|
||||||
const WORD_PREFIX_LEVEL_POSITION_DOCIDS_DB_NAME: &str = "word-prefix-level-position-docids";
|
|
||||||
const FIELD_ID_WORD_COUNT_DOCIDS_DB_NAME: &str = "field-id-word-count-docids";
|
|
||||||
const FACET_ID_F64_DOCIDS_DB_NAME: &str = "facet-id-f64-docids";
|
|
||||||
const FACET_ID_STRING_DOCIDS_DB_NAME: &str = "facet-id-string-docids";
|
|
||||||
const FIELD_ID_DOCID_FACET_F64S_DB_NAME: &str = "field-id-docid-facet-f64s";
|
|
||||||
const FIELD_ID_DOCID_FACET_STRINGS_DB_NAME: &str = "field-id-docid-facet-strings";
|
|
||||||
|
|
||||||
const DOCUMENTS_DB_NAME: &str = "documents";
|
|
||||||
|
|
||||||
const ALL_DATABASE_NAMES: &[&str] = &[
|
const ALL_DATABASE_NAMES: &[&str] = &[
|
||||||
MAIN_DB_NAME,
|
MAIN,
|
||||||
WORD_DOCIDS_DB_NAME,
|
WORD_DOCIDS,
|
||||||
WORD_PREFIX_DOCIDS_DB_NAME,
|
WORD_PREFIX_DOCIDS,
|
||||||
DOCID_WORD_POSITIONS_DB_NAME,
|
DOCID_WORD_POSITIONS,
|
||||||
WORD_PAIR_PROXIMITY_DOCIDS_DB_NAME,
|
WORD_PAIR_PROXIMITY_DOCIDS,
|
||||||
WORD_PREFIX_PAIR_PROXIMITY_DOCIDS_DB_NAME,
|
WORD_PREFIX_PAIR_PROXIMITY_DOCIDS,
|
||||||
WORD_LEVEL_POSITION_DOCIDS_DB_NAME,
|
WORD_LEVEL_POSITION_DOCIDS,
|
||||||
WORD_PREFIX_LEVEL_POSITION_DOCIDS_DB_NAME,
|
WORD_PREFIX_LEVEL_POSITION_DOCIDS,
|
||||||
FIELD_ID_WORD_COUNT_DOCIDS_DB_NAME,
|
FIELD_ID_WORD_COUNT_DOCIDS,
|
||||||
FACET_ID_F64_DOCIDS_DB_NAME,
|
FACET_ID_F64_DOCIDS,
|
||||||
FACET_ID_STRING_DOCIDS_DB_NAME,
|
FACET_ID_STRING_DOCIDS,
|
||||||
FIELD_ID_DOCID_FACET_F64S_DB_NAME,
|
FIELD_ID_DOCID_FACET_F64S,
|
||||||
FIELD_ID_DOCID_FACET_STRINGS_DB_NAME,
|
FIELD_ID_DOCID_FACET_STRINGS,
|
||||||
DOCUMENTS_DB_NAME,
|
DOCUMENTS,
|
||||||
];
|
];
|
||||||
|
|
||||||
const POSTINGS_DATABASE_NAMES: &[&str] = &[
|
const POSTINGS_DATABASE_NAMES: &[&str] = &[
|
||||||
WORD_DOCIDS_DB_NAME,
|
WORD_DOCIDS,
|
||||||
WORD_PREFIX_DOCIDS_DB_NAME,
|
WORD_PREFIX_DOCIDS,
|
||||||
DOCID_WORD_POSITIONS_DB_NAME,
|
DOCID_WORD_POSITIONS,
|
||||||
WORD_PAIR_PROXIMITY_DOCIDS_DB_NAME,
|
WORD_PAIR_PROXIMITY_DOCIDS,
|
||||||
WORD_PREFIX_PAIR_PROXIMITY_DOCIDS_DB_NAME,
|
WORD_PREFIX_PAIR_PROXIMITY_DOCIDS,
|
||||||
];
|
];
|
||||||
|
|
||||||
#[derive(Debug, StructOpt)]
|
#[derive(Debug, StructOpt)]
|
||||||
@ -944,21 +930,21 @@ fn size_of_databases(index: &Index, rtxn: &heed::RoTxn, names: Vec<String>) -> a
|
|||||||
|
|
||||||
for name in names {
|
for name in names {
|
||||||
let database = match name.as_str() {
|
let database = match name.as_str() {
|
||||||
MAIN_DB_NAME => &main,
|
MAIN => &main,
|
||||||
WORD_PREFIX_DOCIDS_DB_NAME => word_prefix_docids.as_polymorph(),
|
WORD_PREFIX_DOCIDS => word_prefix_docids.as_polymorph(),
|
||||||
WORD_DOCIDS_DB_NAME => word_docids.as_polymorph(),
|
WORD_DOCIDS => word_docids.as_polymorph(),
|
||||||
DOCID_WORD_POSITIONS_DB_NAME => docid_word_positions.as_polymorph(),
|
DOCID_WORD_POSITIONS => docid_word_positions.as_polymorph(),
|
||||||
WORD_PAIR_PROXIMITY_DOCIDS_DB_NAME => word_pair_proximity_docids.as_polymorph(),
|
WORD_PAIR_PROXIMITY_DOCIDS => word_pair_proximity_docids.as_polymorph(),
|
||||||
WORD_PREFIX_PAIR_PROXIMITY_DOCIDS_DB_NAME => word_prefix_pair_proximity_docids.as_polymorph(),
|
WORD_PREFIX_PAIR_PROXIMITY_DOCIDS => word_prefix_pair_proximity_docids.as_polymorph(),
|
||||||
WORD_LEVEL_POSITION_DOCIDS_DB_NAME => word_level_position_docids.as_polymorph(),
|
WORD_LEVEL_POSITION_DOCIDS => word_level_position_docids.as_polymorph(),
|
||||||
WORD_PREFIX_LEVEL_POSITION_DOCIDS_DB_NAME => word_prefix_level_position_docids.as_polymorph(),
|
WORD_PREFIX_LEVEL_POSITION_DOCIDS => word_prefix_level_position_docids.as_polymorph(),
|
||||||
FIELD_ID_WORD_COUNT_DOCIDS_DB_NAME => field_id_word_count_docids.as_polymorph(),
|
FIELD_ID_WORD_COUNT_DOCIDS => field_id_word_count_docids.as_polymorph(),
|
||||||
FACET_ID_F64_DOCIDS_DB_NAME => facet_id_f64_docids.as_polymorph(),
|
FACET_ID_F64_DOCIDS => facet_id_f64_docids.as_polymorph(),
|
||||||
FACET_ID_STRING_DOCIDS_DB_NAME => facet_id_string_docids.as_polymorph(),
|
FACET_ID_STRING_DOCIDS => facet_id_string_docids.as_polymorph(),
|
||||||
FIELD_ID_DOCID_FACET_F64S_DB_NAME => field_id_docid_facet_f64s.as_polymorph(),
|
FIELD_ID_DOCID_FACET_F64S => field_id_docid_facet_f64s.as_polymorph(),
|
||||||
FIELD_ID_DOCID_FACET_STRINGS_DB_NAME => field_id_docid_facet_strings.as_polymorph(),
|
FIELD_ID_DOCID_FACET_STRINGS => field_id_docid_facet_strings.as_polymorph(),
|
||||||
|
|
||||||
DOCUMENTS_DB_NAME => documents.as_polymorph(),
|
DOCUMENTS => documents.as_polymorph(),
|
||||||
unknown => anyhow::bail!("unknown database {:?}", unknown),
|
unknown => anyhow::bail!("unknown database {:?}", unknown),
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -1039,27 +1025,27 @@ fn database_stats(index: &Index, rtxn: &heed::RoTxn, name: &str) -> anyhow::Resu
|
|||||||
}
|
}
|
||||||
|
|
||||||
match name {
|
match name {
|
||||||
WORD_DOCIDS_DB_NAME => {
|
WORD_DOCIDS => {
|
||||||
let db = index.word_docids.as_polymorph();
|
let db = index.word_docids.as_polymorph();
|
||||||
compute_stats::<RoaringBitmapCodec>(*db, rtxn, name)
|
compute_stats::<RoaringBitmapCodec>(*db, rtxn, name)
|
||||||
},
|
},
|
||||||
WORD_PREFIX_DOCIDS_DB_NAME => {
|
WORD_PREFIX_DOCIDS => {
|
||||||
let db = index.word_prefix_docids.as_polymorph();
|
let db = index.word_prefix_docids.as_polymorph();
|
||||||
compute_stats::<RoaringBitmapCodec>(*db, rtxn, name)
|
compute_stats::<RoaringBitmapCodec>(*db, rtxn, name)
|
||||||
},
|
},
|
||||||
DOCID_WORD_POSITIONS_DB_NAME => {
|
DOCID_WORD_POSITIONS => {
|
||||||
let db = index.docid_word_positions.as_polymorph();
|
let db = index.docid_word_positions.as_polymorph();
|
||||||
compute_stats::<BoRoaringBitmapCodec>(*db, rtxn, name)
|
compute_stats::<BoRoaringBitmapCodec>(*db, rtxn, name)
|
||||||
},
|
},
|
||||||
WORD_PAIR_PROXIMITY_DOCIDS_DB_NAME => {
|
WORD_PAIR_PROXIMITY_DOCIDS => {
|
||||||
let db = index.word_pair_proximity_docids.as_polymorph();
|
let db = index.word_pair_proximity_docids.as_polymorph();
|
||||||
compute_stats::<CboRoaringBitmapCodec>(*db, rtxn, name)
|
compute_stats::<CboRoaringBitmapCodec>(*db, rtxn, name)
|
||||||
},
|
},
|
||||||
WORD_PREFIX_PAIR_PROXIMITY_DOCIDS_DB_NAME => {
|
WORD_PREFIX_PAIR_PROXIMITY_DOCIDS => {
|
||||||
let db = index.word_prefix_pair_proximity_docids.as_polymorph();
|
let db = index.word_prefix_pair_proximity_docids.as_polymorph();
|
||||||
compute_stats::<CboRoaringBitmapCodec>(*db, rtxn, name)
|
compute_stats::<CboRoaringBitmapCodec>(*db, rtxn, name)
|
||||||
},
|
},
|
||||||
FIELD_ID_WORD_COUNT_DOCIDS_DB_NAME => {
|
FIELD_ID_WORD_COUNT_DOCIDS => {
|
||||||
let db = index.field_id_word_count_docids.as_polymorph();
|
let db = index.field_id_word_count_docids.as_polymorph();
|
||||||
compute_stats::<CboRoaringBitmapCodec>(*db, rtxn, name)
|
compute_stats::<CboRoaringBitmapCodec>(*db, rtxn, name)
|
||||||
},
|
},
|
||||||
|
@ -5,7 +5,6 @@ authors = ["Kerollmops <clement@meilisearch.com>"]
|
|||||||
edition = "2018"
|
edition = "2018"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow = "1.0.38"
|
|
||||||
bstr = "0.2.15"
|
bstr = "0.2.15"
|
||||||
byteorder = "1.4.2"
|
byteorder = "1.4.2"
|
||||||
chrono = { version = "0.4.19", features = ["serde"] }
|
chrono = { version = "0.4.19", features = ["serde"] }
|
||||||
|
@ -1,11 +1,12 @@
|
|||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
|
|
||||||
use anyhow::{Context, bail};
|
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use serde::{Serialize, Deserialize};
|
use serde::{Serialize, Deserialize};
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
|
|
||||||
|
use crate::error::{Error, UserError};
|
||||||
|
|
||||||
static ASC_DESC_REGEX: Lazy<Regex> = Lazy::new(|| {
|
static ASC_DESC_REGEX: Lazy<Regex> = Lazy::new(|| {
|
||||||
Regex::new(r#"(asc|desc)\(([\w_-]+)\)"#).unwrap()
|
Regex::new(r#"(asc|desc)\(([\w_-]+)\)"#).unwrap()
|
||||||
});
|
});
|
||||||
@ -41,7 +42,7 @@ impl Criterion {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl FromStr for Criterion {
|
impl FromStr for Criterion {
|
||||||
type Err = anyhow::Error;
|
type Err = Error;
|
||||||
|
|
||||||
fn from_str(txt: &str) -> Result<Criterion, Self::Err> {
|
fn from_str(txt: &str) -> Result<Criterion, Self::Err> {
|
||||||
match txt {
|
match txt {
|
||||||
@ -51,13 +52,15 @@ impl FromStr for Criterion {
|
|||||||
"attribute" => Ok(Criterion::Attribute),
|
"attribute" => Ok(Criterion::Attribute),
|
||||||
"exactness" => Ok(Criterion::Exactness),
|
"exactness" => Ok(Criterion::Exactness),
|
||||||
text => {
|
text => {
|
||||||
let caps = ASC_DESC_REGEX.captures(text).with_context(|| format!("unknown criterion name: {}", text))?;
|
let caps = ASC_DESC_REGEX.captures(text).ok_or_else(|| {
|
||||||
|
UserError::InvalidCriterionName { name: text.to_string() }
|
||||||
|
})?;
|
||||||
let order = caps.get(1).unwrap().as_str();
|
let order = caps.get(1).unwrap().as_str();
|
||||||
let field_name = caps.get(2).unwrap().as_str();
|
let field_name = caps.get(2).unwrap().as_str();
|
||||||
match order {
|
match order {
|
||||||
"asc" => Ok(Criterion::Asc(field_name.to_string())),
|
"asc" => Ok(Criterion::Asc(field_name.to_string())),
|
||||||
"desc" => Ok(Criterion::Desc(field_name.to_string())),
|
"desc" => Ok(Criterion::Desc(field_name.to_string())),
|
||||||
otherwise => bail!("unknown criterion name: {}", otherwise),
|
text => return Err(UserError::InvalidCriterionName { name: text.to_string() }.into()),
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
265
milli/src/error.rs
Normal file
265
milli/src/error.rs
Normal file
@ -0,0 +1,265 @@
|
|||||||
|
use std::convert::Infallible;
|
||||||
|
use std::error::Error as StdError;
|
||||||
|
use std::{fmt, io, str};
|
||||||
|
|
||||||
|
use heed::{MdbError, Error as HeedError};
|
||||||
|
use rayon::ThreadPoolBuildError;
|
||||||
|
use serde_json::{Map, Value};
|
||||||
|
|
||||||
|
use crate::search::ParserRule;
|
||||||
|
use crate::{DocumentId, FieldId};
|
||||||
|
|
||||||
|
pub type Object = Map<String, Value>;
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum Error {
|
||||||
|
InternalError(InternalError),
|
||||||
|
IoError(io::Error),
|
||||||
|
UserError(UserError),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum InternalError {
|
||||||
|
DatabaseClosing,
|
||||||
|
DatabaseMissingEntry { db_name: &'static str, key: Option<&'static str> },
|
||||||
|
FieldIdMapMissingEntry(FieldIdMapMissingEntry),
|
||||||
|
Fst(fst::Error),
|
||||||
|
GrenadInvalidCompressionType,
|
||||||
|
IndexingMergingKeys { process: &'static str },
|
||||||
|
InvalidDatabaseTyping,
|
||||||
|
RayonThreadPool(ThreadPoolBuildError),
|
||||||
|
SerdeJson(serde_json::Error),
|
||||||
|
Serialization(SerializationError),
|
||||||
|
Store(MdbError),
|
||||||
|
Utf8(str::Utf8Error),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum SerializationError {
|
||||||
|
Decoding { db_name: Option<&'static str> },
|
||||||
|
Encoding { db_name: Option<&'static str> },
|
||||||
|
InvalidNumberSerialization,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum FieldIdMapMissingEntry {
|
||||||
|
FieldId { field_id: FieldId, process: &'static str },
|
||||||
|
FieldName { field_name: String, process: &'static str },
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum UserError {
|
||||||
|
AttributeLimitReached,
|
||||||
|
Csv(csv::Error),
|
||||||
|
MaxDatabaseSizeReached,
|
||||||
|
DocumentLimitReached,
|
||||||
|
InvalidFilter(pest::error::Error<ParserRule>),
|
||||||
|
InvalidCriterionName { name: String },
|
||||||
|
InvalidDocumentId { document_id: Value },
|
||||||
|
InvalidFilterAttribute(pest::error::Error<ParserRule>),
|
||||||
|
InvalidStoreFile,
|
||||||
|
MissingDocumentId { document: Object },
|
||||||
|
MissingPrimaryKey,
|
||||||
|
NoSpaceLeftOnDevice,
|
||||||
|
PrimaryKeyCannotBeChanged,
|
||||||
|
PrimaryKeyCannotBeReset,
|
||||||
|
SerdeJson(serde_json::Error),
|
||||||
|
UnknownInternalDocumentId { document_id: DocumentId },
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<io::Error> for Error {
|
||||||
|
fn from(error: io::Error) -> Error {
|
||||||
|
// TODO must be improved and more precise
|
||||||
|
Error::IoError(error)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<fst::Error> for Error {
|
||||||
|
fn from(error: fst::Error) -> Error {
|
||||||
|
Error::InternalError(InternalError::Fst(error))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<E> From<grenad::Error<E>> for Error where Error: From<E> {
|
||||||
|
fn from(error: grenad::Error<E>) -> Error {
|
||||||
|
match error {
|
||||||
|
grenad::Error::Io(error) => Error::IoError(error),
|
||||||
|
grenad::Error::Merge(error) => Error::from(error),
|
||||||
|
grenad::Error::InvalidCompressionType => {
|
||||||
|
Error::InternalError(InternalError::GrenadInvalidCompressionType)
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<str::Utf8Error> for Error {
|
||||||
|
fn from(error: str::Utf8Error) -> Error {
|
||||||
|
Error::InternalError(InternalError::Utf8(error))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<Infallible> for Error {
|
||||||
|
fn from(_error: Infallible) -> Error {
|
||||||
|
unreachable!()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<HeedError> for Error {
|
||||||
|
fn from(error: HeedError) -> Error {
|
||||||
|
use self::Error::*;
|
||||||
|
use self::InternalError::*;
|
||||||
|
use self::SerializationError::*;
|
||||||
|
use self::UserError::*;
|
||||||
|
|
||||||
|
match error {
|
||||||
|
HeedError::Io(error) => Error::from(error),
|
||||||
|
HeedError::Mdb(MdbError::MapFull) => UserError(MaxDatabaseSizeReached),
|
||||||
|
HeedError::Mdb(MdbError::Invalid) => UserError(InvalidStoreFile),
|
||||||
|
HeedError::Mdb(error) => InternalError(Store(error)),
|
||||||
|
HeedError::Encoding => InternalError(Serialization(Encoding { db_name: None })),
|
||||||
|
HeedError::Decoding => InternalError(Serialization(Decoding { db_name: None })),
|
||||||
|
HeedError::InvalidDatabaseTyping => InternalError(InvalidDatabaseTyping),
|
||||||
|
HeedError::DatabaseClosing => InternalError(DatabaseClosing),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<ThreadPoolBuildError> for Error {
|
||||||
|
fn from(error: ThreadPoolBuildError) -> Error {
|
||||||
|
Error::InternalError(InternalError::RayonThreadPool(error))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<FieldIdMapMissingEntry> for Error {
|
||||||
|
fn from(error: FieldIdMapMissingEntry) -> Error {
|
||||||
|
Error::InternalError(InternalError::FieldIdMapMissingEntry(error))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<InternalError> for Error {
|
||||||
|
fn from(error: InternalError) -> Error {
|
||||||
|
Error::InternalError(error)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<UserError> for Error {
|
||||||
|
fn from(error: UserError) -> Error {
|
||||||
|
Error::UserError(error)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<SerializationError> for Error {
|
||||||
|
fn from(error: SerializationError) -> Error {
|
||||||
|
Error::InternalError(InternalError::Serialization(error))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for Error {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
match self {
|
||||||
|
Self::InternalError(error) => write!(f, "internal: {}", error),
|
||||||
|
Self::IoError(error) => error.fmt(f),
|
||||||
|
Self::UserError(error) => error.fmt(f),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl StdError for Error {}
|
||||||
|
|
||||||
|
impl fmt::Display for InternalError {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
match self {
|
||||||
|
Self::DatabaseMissingEntry { db_name, key } => {
|
||||||
|
write!(f, "missing {} in the {} database", key.unwrap_or("key"), db_name)
|
||||||
|
},
|
||||||
|
Self::FieldIdMapMissingEntry(error) => error.fmt(f),
|
||||||
|
Self::Fst(error) => error.fmt(f),
|
||||||
|
Self::GrenadInvalidCompressionType => {
|
||||||
|
f.write_str("invalid compression type have been specified to grenad")
|
||||||
|
},
|
||||||
|
Self::IndexingMergingKeys { process } => {
|
||||||
|
write!(f, "invalid merge while processing {}", process)
|
||||||
|
},
|
||||||
|
Self::Serialization(error) => error.fmt(f),
|
||||||
|
Self::InvalidDatabaseTyping => HeedError::InvalidDatabaseTyping.fmt(f),
|
||||||
|
Self::RayonThreadPool(error) => error.fmt(f),
|
||||||
|
Self::SerdeJson(error) => error.fmt(f),
|
||||||
|
Self::DatabaseClosing => HeedError::DatabaseClosing.fmt(f),
|
||||||
|
Self::Store(error) => error.fmt(f),
|
||||||
|
Self::Utf8(error) => error.fmt(f),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl StdError for InternalError {}
|
||||||
|
|
||||||
|
impl fmt::Display for UserError {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
match self {
|
||||||
|
Self::AttributeLimitReached => f.write_str("maximum number of attributes reached"),
|
||||||
|
Self::Csv(error) => error.fmt(f),
|
||||||
|
Self::DocumentLimitReached => f.write_str("maximum number of documents reached"),
|
||||||
|
Self::InvalidFilter(error) => error.fmt(f),
|
||||||
|
Self::InvalidCriterionName { name } => write!(f, "invalid criterion {}", name),
|
||||||
|
Self::InvalidDocumentId { document_id } => {
|
||||||
|
let json = serde_json::to_string(document_id).unwrap();
|
||||||
|
write!(f, "document identifier is invalid {}", json)
|
||||||
|
},
|
||||||
|
Self::InvalidFilterAttribute(error) => error.fmt(f),
|
||||||
|
Self::MissingDocumentId { document } => {
|
||||||
|
let json = serde_json::to_string(document).unwrap();
|
||||||
|
write!(f, "document doesn't have an identifier {}", json)
|
||||||
|
},
|
||||||
|
Self::MissingPrimaryKey => f.write_str("missing primary key"),
|
||||||
|
Self::MaxDatabaseSizeReached => f.write_str("maximum database size reached"),
|
||||||
|
// TODO where can we find it instead of writing the text ourselves?
|
||||||
|
Self::NoSpaceLeftOnDevice => f.write_str("no space left on device"),
|
||||||
|
Self::InvalidStoreFile => f.write_str("store file is not a valid database file"),
|
||||||
|
Self::PrimaryKeyCannotBeChanged => {
|
||||||
|
f.write_str("primary key cannot be changed if the database contains documents")
|
||||||
|
},
|
||||||
|
Self::PrimaryKeyCannotBeReset => {
|
||||||
|
f.write_str("primary key cannot be reset if the database contains documents")
|
||||||
|
},
|
||||||
|
Self::SerdeJson(error) => error.fmt(f),
|
||||||
|
Self::UnknownInternalDocumentId { document_id } => {
|
||||||
|
write!(f, "an unknown internal document id have been used ({})", document_id)
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl StdError for UserError {}
|
||||||
|
|
||||||
|
impl fmt::Display for FieldIdMapMissingEntry {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
match self {
|
||||||
|
Self::FieldId { field_id, process } => {
|
||||||
|
write!(f, "unknown field id {} coming from the {} process", field_id, process)
|
||||||
|
},
|
||||||
|
Self::FieldName { field_name, process } => {
|
||||||
|
write!(f, "unknown field name {} coming from the {} process", field_name, process)
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl StdError for FieldIdMapMissingEntry {}
|
||||||
|
|
||||||
|
impl fmt::Display for SerializationError {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
match self {
|
||||||
|
Self::Decoding { db_name: Some(name) } => {
|
||||||
|
write!(f, "decoding from the {} database failed", name)
|
||||||
|
},
|
||||||
|
Self::Decoding { db_name: None } => f.write_str("decoding failed"),
|
||||||
|
Self::Encoding { db_name: Some(name) } => {
|
||||||
|
write!(f, "encoding into the {} database failed", name)
|
||||||
|
},
|
||||||
|
Self::Encoding { db_name: None } => f.write_str("encoding failed"),
|
||||||
|
Self::InvalidNumberSerialization => f.write_str("number is not a valid finite number"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl StdError for SerializationError {}
|
@ -5,6 +5,14 @@ use crate::FieldId;
|
|||||||
|
|
||||||
pub struct FacetValueStringCodec;
|
pub struct FacetValueStringCodec;
|
||||||
|
|
||||||
|
impl FacetValueStringCodec {
|
||||||
|
pub fn serialize_into(field_id: FieldId, value: &str, out: &mut Vec<u8>) {
|
||||||
|
out.reserve(value.len() + 1);
|
||||||
|
out.push(field_id);
|
||||||
|
out.extend_from_slice(value.as_bytes());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl<'a> heed::BytesDecode<'a> for FacetValueStringCodec {
|
impl<'a> heed::BytesDecode<'a> for FacetValueStringCodec {
|
||||||
type DItem = (FieldId, &'a str);
|
type DItem = (FieldId, &'a str);
|
||||||
|
|
||||||
@ -19,9 +27,8 @@ impl<'a> heed::BytesEncode<'a> for FacetValueStringCodec {
|
|||||||
type EItem = (FieldId, &'a str);
|
type EItem = (FieldId, &'a str);
|
||||||
|
|
||||||
fn bytes_encode((field_id, value): &Self::EItem) -> Option<Cow<[u8]>> {
|
fn bytes_encode((field_id, value): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||||
let mut bytes = Vec::with_capacity(value.len() + 1);
|
let mut bytes = Vec::new();
|
||||||
bytes.push(*field_id);
|
FacetValueStringCodec::serialize_into(*field_id, value, &mut bytes);
|
||||||
bytes.extend_from_slice(value.as_bytes());
|
|
||||||
Some(Cow::Owned(bytes))
|
Some(Cow::Owned(bytes))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -6,6 +6,15 @@ use crate::{FieldId, DocumentId};
|
|||||||
|
|
||||||
pub struct FieldDocIdFacetStringCodec;
|
pub struct FieldDocIdFacetStringCodec;
|
||||||
|
|
||||||
|
impl FieldDocIdFacetStringCodec {
|
||||||
|
pub fn serialize_into(field_id: FieldId, document_id: DocumentId, value: &str, out: &mut Vec<u8>) {
|
||||||
|
out.reserve(1 + 4 + value.len());
|
||||||
|
out.push(field_id);
|
||||||
|
out.extend_from_slice(&document_id.to_be_bytes());
|
||||||
|
out.extend_from_slice(value.as_bytes());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl<'a> heed::BytesDecode<'a> for FieldDocIdFacetStringCodec {
|
impl<'a> heed::BytesDecode<'a> for FieldDocIdFacetStringCodec {
|
||||||
type DItem = (FieldId, DocumentId, &'a str);
|
type DItem = (FieldId, DocumentId, &'a str);
|
||||||
|
|
||||||
@ -22,10 +31,8 @@ impl<'a> heed::BytesEncode<'a> for FieldDocIdFacetStringCodec {
|
|||||||
type EItem = (FieldId, DocumentId, &'a str);
|
type EItem = (FieldId, DocumentId, &'a str);
|
||||||
|
|
||||||
fn bytes_encode((field_id, document_id, value): &Self::EItem) -> Option<Cow<[u8]>> {
|
fn bytes_encode((field_id, document_id, value): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||||
let mut bytes = Vec::with_capacity(1 + 4 + value.len());
|
let mut bytes = Vec::new();
|
||||||
bytes.push(*field_id);
|
FieldDocIdFacetStringCodec::serialize_into(*field_id, *document_id, value, &mut bytes);
|
||||||
bytes.extend_from_slice(&document_id.to_be_bytes());
|
|
||||||
bytes.extend_from_slice(value.as_bytes());
|
|
||||||
Some(Cow::Owned(bytes))
|
Some(Cow::Owned(bytes))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -6,6 +6,13 @@ use roaring::RoaringBitmap;
|
|||||||
|
|
||||||
pub struct BoRoaringBitmapCodec;
|
pub struct BoRoaringBitmapCodec;
|
||||||
|
|
||||||
|
impl BoRoaringBitmapCodec {
|
||||||
|
pub fn serialize_into(bitmap: &RoaringBitmap, out: &mut Vec<u8>) {
|
||||||
|
out.reserve(bitmap.len() as usize * size_of::<u32>());
|
||||||
|
bitmap.iter().map(u32::to_ne_bytes).for_each(|bytes| out.extend_from_slice(&bytes));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl heed::BytesDecode<'_> for BoRoaringBitmapCodec {
|
impl heed::BytesDecode<'_> for BoRoaringBitmapCodec {
|
||||||
type DItem = RoaringBitmap;
|
type DItem = RoaringBitmap;
|
||||||
|
|
||||||
@ -25,12 +32,8 @@ impl heed::BytesEncode<'_> for BoRoaringBitmapCodec {
|
|||||||
type EItem = RoaringBitmap;
|
type EItem = RoaringBitmap;
|
||||||
|
|
||||||
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
|
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
|
||||||
let mut out = Vec::with_capacity(item.len() as usize * size_of::<u32>());
|
let mut out = Vec::new();
|
||||||
|
BoRoaringBitmapCodec::serialize_into(item, &mut out);
|
||||||
item.iter()
|
|
||||||
.map(|i| i.to_ne_bytes())
|
|
||||||
.for_each(|bytes| out.extend_from_slice(&bytes));
|
|
||||||
|
|
||||||
Some(Cow::Owned(out))
|
Some(Cow::Owned(out))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -23,18 +23,17 @@ impl CboRoaringBitmapCodec {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn serialize_into(roaring: &RoaringBitmap, vec: &mut Vec<u8>) -> io::Result<()> {
|
pub fn serialize_into(roaring: &RoaringBitmap, vec: &mut Vec<u8>) {
|
||||||
if roaring.len() <= THRESHOLD as u64 {
|
if roaring.len() <= THRESHOLD as u64 {
|
||||||
// If the number of items (u32s) to encode is less than or equal to the threshold
|
// If the number of items (u32s) to encode is less than or equal to the threshold
|
||||||
// it means that it would weigh the same or less than the RoaringBitmap
|
// it means that it would weigh the same or less than the RoaringBitmap
|
||||||
// header, so we directly encode them using ByteOrder instead.
|
// header, so we directly encode them using ByteOrder instead.
|
||||||
for integer in roaring {
|
for integer in roaring {
|
||||||
vec.write_u32::<NativeEndian>(integer)?;
|
vec.write_u32::<NativeEndian>(integer).unwrap();
|
||||||
}
|
}
|
||||||
Ok(())
|
|
||||||
} else {
|
} else {
|
||||||
// Otherwise, we use the classic RoaringBitmapCodec that writes a header.
|
// Otherwise, we use the classic RoaringBitmapCodec that writes a header.
|
||||||
roaring.serialize_into(vec)
|
roaring.serialize_into(vec).unwrap();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -68,7 +67,7 @@ impl heed::BytesEncode<'_> for CboRoaringBitmapCodec {
|
|||||||
|
|
||||||
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
|
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
|
||||||
let mut vec = Vec::with_capacity(Self::serialized_size(item));
|
let mut vec = Vec::with_capacity(Self::serialized_size(item));
|
||||||
Self::serialize_into(item, &mut vec).ok()?;
|
Self::serialize_into(item, &mut vec);
|
||||||
Some(Cow::Owned(vec))
|
Some(Cow::Owned(vec))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2,14 +2,14 @@ use std::borrow::Cow;
|
|||||||
use std::collections::{HashMap, HashSet};
|
use std::collections::{HashMap, HashSet};
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
use anyhow::Context;
|
|
||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
use heed::{Database, PolyDatabase, RoTxn, RwTxn};
|
use heed::{Database, PolyDatabase, RoTxn, RwTxn};
|
||||||
use heed::types::*;
|
use heed::types::*;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
|
use crate::error::{UserError, FieldIdMapMissingEntry, InternalError};
|
||||||
use crate::{Criterion, default_criteria, FacetDistribution, FieldsDistribution, Search};
|
use crate::{Criterion, default_criteria, FacetDistribution, FieldsDistribution, Search};
|
||||||
use crate::{BEU32, DocumentId, ExternalDocumentsIds, FieldId};
|
use crate::{BEU32, DocumentId, ExternalDocumentsIds, FieldId, Result};
|
||||||
use crate::{
|
use crate::{
|
||||||
BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec,
|
BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec,
|
||||||
ObkvCodec, RoaringBitmapCodec, RoaringBitmapLenCodec, StrLevelPositionCodec, StrStrU8Codec,
|
ObkvCodec, RoaringBitmapCodec, RoaringBitmapLenCodec, StrLevelPositionCodec, StrStrU8Codec,
|
||||||
@ -21,25 +21,44 @@ use crate::heed_codec::facet::{
|
|||||||
};
|
};
|
||||||
use crate::fields_ids_map::FieldsIdsMap;
|
use crate::fields_ids_map::FieldsIdsMap;
|
||||||
|
|
||||||
pub const CRITERIA_KEY: &str = "criteria";
|
pub mod main_key {
|
||||||
pub const DISPLAYED_FIELDS_KEY: &str = "displayed-fields";
|
pub const CRITERIA_KEY: &str = "criteria";
|
||||||
pub const DISTINCT_FIELD_KEY: &str = "distinct-field-key";
|
pub const DISPLAYED_FIELDS_KEY: &str = "displayed-fields";
|
||||||
pub const DOCUMENTS_IDS_KEY: &str = "documents-ids";
|
pub const DISTINCT_FIELD_KEY: &str = "distinct-field-key";
|
||||||
pub const FILTERABLE_FIELDS_KEY: &str = "filterable-fields";
|
pub const DOCUMENTS_IDS_KEY: &str = "documents-ids";
|
||||||
pub const FIELDS_DISTRIBUTION_KEY: &str = "fields-distribution";
|
pub const FILTERABLE_FIELDS_KEY: &str = "filterable-fields";
|
||||||
pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map";
|
pub const FIELDS_DISTRIBUTION_KEY: &str = "fields-distribution";
|
||||||
pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids";
|
pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map";
|
||||||
pub const NUMBER_FACETED_DOCUMENTS_IDS_PREFIX: &str = "number-faceted-documents-ids";
|
pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids";
|
||||||
pub const PRIMARY_KEY_KEY: &str = "primary-key";
|
pub const NUMBER_FACETED_DOCUMENTS_IDS_PREFIX: &str = "number-faceted-documents-ids";
|
||||||
pub const SEARCHABLE_FIELDS_KEY: &str = "searchable-fields";
|
pub const PRIMARY_KEY_KEY: &str = "primary-key";
|
||||||
pub const SOFT_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "soft-external-documents-ids";
|
pub const SEARCHABLE_FIELDS_KEY: &str = "searchable-fields";
|
||||||
pub const STOP_WORDS_KEY: &str = "stop-words";
|
pub const SOFT_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "soft-external-documents-ids";
|
||||||
pub const STRING_FACETED_DOCUMENTS_IDS_PREFIX: &str = "string-faceted-documents-ids";
|
pub const STOP_WORDS_KEY: &str = "stop-words";
|
||||||
pub const SYNONYMS_KEY: &str = "synonyms";
|
pub const STRING_FACETED_DOCUMENTS_IDS_PREFIX: &str = "string-faceted-documents-ids";
|
||||||
pub const WORDS_FST_KEY: &str = "words-fst";
|
pub const SYNONYMS_KEY: &str = "synonyms";
|
||||||
pub const WORDS_PREFIXES_FST_KEY: &str = "words-prefixes-fst";
|
pub const WORDS_FST_KEY: &str = "words-fst";
|
||||||
const CREATED_AT_KEY: &str = "created-at";
|
pub const WORDS_PREFIXES_FST_KEY: &str = "words-prefixes-fst";
|
||||||
const UPDATED_AT_KEY: &str = "updated-at";
|
pub const CREATED_AT_KEY: &str = "created-at";
|
||||||
|
pub const UPDATED_AT_KEY: &str = "updated-at";
|
||||||
|
}
|
||||||
|
|
||||||
|
pub mod db_name {
|
||||||
|
pub const MAIN: &str = "main";
|
||||||
|
pub const WORD_DOCIDS: &str = "word-docids";
|
||||||
|
pub const WORD_PREFIX_DOCIDS: &str = "word-prefix-docids";
|
||||||
|
pub const DOCID_WORD_POSITIONS: &str = "docid-word-positions";
|
||||||
|
pub const WORD_PAIR_PROXIMITY_DOCIDS: &str = "word-pair-proximity-docids";
|
||||||
|
pub const WORD_PREFIX_PAIR_PROXIMITY_DOCIDS: &str = "word-prefix-pair-proximity-docids";
|
||||||
|
pub const WORD_LEVEL_POSITION_DOCIDS: &str = "word-level-position-docids";
|
||||||
|
pub const WORD_PREFIX_LEVEL_POSITION_DOCIDS: &str = "word-prefix-level-position-docids";
|
||||||
|
pub const FIELD_ID_WORD_COUNT_DOCIDS: &str = "field-id-word-count-docids";
|
||||||
|
pub const FACET_ID_F64_DOCIDS: &str = "facet-id-f64-docids";
|
||||||
|
pub const FACET_ID_STRING_DOCIDS: &str = "facet-id-string-docids";
|
||||||
|
pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s";
|
||||||
|
pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings";
|
||||||
|
pub const DOCUMENTS: &str = "documents";
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct Index {
|
pub struct Index {
|
||||||
@ -84,24 +103,26 @@ pub struct Index {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Index {
|
impl Index {
|
||||||
pub fn new<P: AsRef<Path>>(mut options: heed::EnvOpenOptions, path: P) -> anyhow::Result<Index> {
|
pub fn new<P: AsRef<Path>>(mut options: heed::EnvOpenOptions, path: P) -> Result<Index> {
|
||||||
|
use db_name::*;
|
||||||
|
|
||||||
options.max_dbs(14);
|
options.max_dbs(14);
|
||||||
|
|
||||||
let env = options.open(path)?;
|
let env = options.open(path)?;
|
||||||
let main = env.create_poly_database(Some("main"))?;
|
let main = env.create_poly_database(Some(MAIN))?;
|
||||||
let word_docids = env.create_database(Some("word-docids"))?;
|
let word_docids = env.create_database(Some(WORD_DOCIDS))?;
|
||||||
let word_prefix_docids = env.create_database(Some("word-prefix-docids"))?;
|
let word_prefix_docids = env.create_database(Some(WORD_PREFIX_DOCIDS))?;
|
||||||
let docid_word_positions = env.create_database(Some("docid-word-positions"))?;
|
let docid_word_positions = env.create_database(Some(DOCID_WORD_POSITIONS))?;
|
||||||
let word_pair_proximity_docids = env.create_database(Some("word-pair-proximity-docids"))?;
|
let word_pair_proximity_docids = env.create_database(Some(WORD_PAIR_PROXIMITY_DOCIDS))?;
|
||||||
let word_prefix_pair_proximity_docids = env.create_database(Some("word-prefix-pair-proximity-docids"))?;
|
let word_prefix_pair_proximity_docids = env.create_database(Some(WORD_PREFIX_PAIR_PROXIMITY_DOCIDS))?;
|
||||||
let word_level_position_docids = env.create_database(Some("word-level-position-docids"))?;
|
let word_level_position_docids = env.create_database(Some(WORD_LEVEL_POSITION_DOCIDS))?;
|
||||||
let field_id_word_count_docids = env.create_database(Some("field-id-word-count-docids"))?;
|
let field_id_word_count_docids = env.create_database(Some(FIELD_ID_WORD_COUNT_DOCIDS))?;
|
||||||
let word_prefix_level_position_docids = env.create_database(Some("word-prefix-level-position-docids"))?;
|
let word_prefix_level_position_docids = env.create_database(Some(WORD_PREFIX_LEVEL_POSITION_DOCIDS))?;
|
||||||
let facet_id_f64_docids = env.create_database(Some("facet-id-f64-docids"))?;
|
let facet_id_f64_docids = env.create_database(Some(FACET_ID_F64_DOCIDS))?;
|
||||||
let facet_id_string_docids = env.create_database(Some("facet-id-string-docids"))?;
|
let facet_id_string_docids = env.create_database(Some(FACET_ID_STRING_DOCIDS))?;
|
||||||
let field_id_docid_facet_f64s = env.create_database(Some("field-id-docid-facet-f64s"))?;
|
let field_id_docid_facet_f64s = env.create_database(Some(FIELD_ID_DOCID_FACET_F64S))?;
|
||||||
let field_id_docid_facet_strings = env.create_database(Some("field-id-docid-facet-strings"))?;
|
let field_id_docid_facet_strings = env.create_database(Some(FIELD_ID_DOCID_FACET_STRINGS))?;
|
||||||
let documents = env.create_database(Some("documents"))?;
|
let documents = env.create_database(Some(DOCUMENTS))?;
|
||||||
|
|
||||||
Index::initialize_creation_dates(&env, main)?;
|
Index::initialize_creation_dates(&env, main)?;
|
||||||
|
|
||||||
@ -127,10 +148,10 @@ impl Index {
|
|||||||
fn initialize_creation_dates(env: &heed::Env, main: PolyDatabase) -> heed::Result<()> {
|
fn initialize_creation_dates(env: &heed::Env, main: PolyDatabase) -> heed::Result<()> {
|
||||||
let mut txn = env.write_txn()?;
|
let mut txn = env.write_txn()?;
|
||||||
// The db was just created, we update its metadata with the relevant information.
|
// The db was just created, we update its metadata with the relevant information.
|
||||||
if main.get::<_, Str, SerdeJson<DateTime<Utc>>>(&txn, CREATED_AT_KEY)?.is_none() {
|
if main.get::<_, Str, SerdeJson<DateTime<Utc>>>(&txn, main_key::CREATED_AT_KEY)?.is_none() {
|
||||||
let now = Utc::now();
|
let now = Utc::now();
|
||||||
main.put::<_, Str, SerdeJson<DateTime<Utc>>>(&mut txn, UPDATED_AT_KEY, &now)?;
|
main.put::<_, Str, SerdeJson<DateTime<Utc>>>(&mut txn, main_key::UPDATED_AT_KEY, &now)?;
|
||||||
main.put::<_, Str, SerdeJson<DateTime<Utc>>>(&mut txn, CREATED_AT_KEY, &now)?;
|
main.put::<_, Str, SerdeJson<DateTime<Utc>>>(&mut txn, main_key::CREATED_AT_KEY, &now)?;
|
||||||
txn.commit()?;
|
txn.commit()?;
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
@ -163,43 +184,43 @@ impl Index {
|
|||||||
/* documents ids */
|
/* documents ids */
|
||||||
|
|
||||||
/// Writes the documents ids that corresponds to the user-ids-documents-ids FST.
|
/// Writes the documents ids that corresponds to the user-ids-documents-ids FST.
|
||||||
pub fn put_documents_ids(&self, wtxn: &mut RwTxn, docids: &RoaringBitmap) -> heed::Result<()> {
|
pub(crate) fn put_documents_ids(&self, wtxn: &mut RwTxn, docids: &RoaringBitmap) -> heed::Result<()> {
|
||||||
self.main.put::<_, Str, RoaringBitmapCodec>(wtxn, DOCUMENTS_IDS_KEY, docids)
|
self.main.put::<_, Str, RoaringBitmapCodec>(wtxn, main_key::DOCUMENTS_IDS_KEY, docids)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the internal documents ids.
|
/// Returns the internal documents ids.
|
||||||
pub fn documents_ids(&self, rtxn: &RoTxn) -> heed::Result<RoaringBitmap> {
|
pub fn documents_ids(&self, rtxn: &RoTxn) -> heed::Result<RoaringBitmap> {
|
||||||
Ok(self.main.get::<_, Str, RoaringBitmapCodec>(rtxn, DOCUMENTS_IDS_KEY)?.unwrap_or_default())
|
Ok(self.main.get::<_, Str, RoaringBitmapCodec>(rtxn, main_key::DOCUMENTS_IDS_KEY)?.unwrap_or_default())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the number of documents indexed in the database.
|
/// Returns the number of documents indexed in the database.
|
||||||
pub fn number_of_documents(&self, rtxn: &RoTxn) -> anyhow::Result<u64> {
|
pub fn number_of_documents(&self, rtxn: &RoTxn) -> Result<u64> {
|
||||||
let count = self.main.get::<_, Str, RoaringBitmapLenCodec>(rtxn, DOCUMENTS_IDS_KEY)?;
|
let count = self.main.get::<_, Str, RoaringBitmapLenCodec>(rtxn, main_key::DOCUMENTS_IDS_KEY)?;
|
||||||
Ok(count.unwrap_or_default())
|
Ok(count.unwrap_or_default())
|
||||||
}
|
}
|
||||||
|
|
||||||
/* primary key */
|
/* primary key */
|
||||||
|
|
||||||
/// Writes the documents primary key, this is the field name that is used to store the id.
|
/// Writes the documents primary key, this is the field name that is used to store the id.
|
||||||
pub fn put_primary_key(&self, wtxn: &mut RwTxn, primary_key: &str) -> heed::Result<()> {
|
pub(crate) fn put_primary_key(&self, wtxn: &mut RwTxn, primary_key: &str) -> heed::Result<()> {
|
||||||
self.set_updated_at(wtxn, &Utc::now())?;
|
self.set_updated_at(wtxn, &Utc::now())?;
|
||||||
self.main.put::<_, Str, Str>(wtxn, PRIMARY_KEY_KEY, &primary_key)
|
self.main.put::<_, Str, Str>(wtxn, main_key::PRIMARY_KEY_KEY, &primary_key)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Deletes the primary key of the documents, this can be done to reset indexes settings.
|
/// Deletes the primary key of the documents, this can be done to reset indexes settings.
|
||||||
pub fn delete_primary_key(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
pub fn delete_primary_key(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||||
self.main.delete::<_, Str>(wtxn, PRIMARY_KEY_KEY)
|
self.main.delete::<_, Str>(wtxn, main_key::PRIMARY_KEY_KEY)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the documents primary key, `None` if it hasn't been defined.
|
/// Returns the documents primary key, `None` if it hasn't been defined.
|
||||||
pub fn primary_key<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<&'t str>> {
|
pub fn primary_key<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<&'t str>> {
|
||||||
self.main.get::<_, Str, Str>(rtxn, PRIMARY_KEY_KEY)
|
self.main.get::<_, Str, Str>(rtxn, main_key::PRIMARY_KEY_KEY)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* external documents ids */
|
/* external documents ids */
|
||||||
|
|
||||||
/// Writes the external documents ids and internal ids (i.e. `u32`).
|
/// Writes the external documents ids and internal ids (i.e. `u32`).
|
||||||
pub fn put_external_documents_ids<'a>(
|
pub(crate) fn put_external_documents_ids<'a>(
|
||||||
&self,
|
&self,
|
||||||
wtxn: &mut RwTxn,
|
wtxn: &mut RwTxn,
|
||||||
external_documents_ids: &ExternalDocumentsIds<'a>,
|
external_documents_ids: &ExternalDocumentsIds<'a>,
|
||||||
@ -208,16 +229,16 @@ impl Index {
|
|||||||
let ExternalDocumentsIds { hard, soft } = external_documents_ids;
|
let ExternalDocumentsIds { hard, soft } = external_documents_ids;
|
||||||
let hard = hard.as_fst().as_bytes();
|
let hard = hard.as_fst().as_bytes();
|
||||||
let soft = soft.as_fst().as_bytes();
|
let soft = soft.as_fst().as_bytes();
|
||||||
self.main.put::<_, Str, ByteSlice>(wtxn, HARD_EXTERNAL_DOCUMENTS_IDS_KEY, hard)?;
|
self.main.put::<_, Str, ByteSlice>(wtxn, main_key::HARD_EXTERNAL_DOCUMENTS_IDS_KEY, hard)?;
|
||||||
self.main.put::<_, Str, ByteSlice>(wtxn, SOFT_EXTERNAL_DOCUMENTS_IDS_KEY, soft)?;
|
self.main.put::<_, Str, ByteSlice>(wtxn, main_key::SOFT_EXTERNAL_DOCUMENTS_IDS_KEY, soft)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the external documents ids map which associate the external ids
|
/// Returns the external documents ids map which associate the external ids
|
||||||
/// with the internal ids (i.e. `u32`).
|
/// with the internal ids (i.e. `u32`).
|
||||||
pub fn external_documents_ids<'t>(&self, rtxn: &'t RoTxn) -> anyhow::Result<ExternalDocumentsIds<'t>> {
|
pub fn external_documents_ids<'t>(&self, rtxn: &'t RoTxn) -> Result<ExternalDocumentsIds<'t>> {
|
||||||
let hard = self.main.get::<_, Str, ByteSlice>(rtxn, HARD_EXTERNAL_DOCUMENTS_IDS_KEY)?;
|
let hard = self.main.get::<_, Str, ByteSlice>(rtxn, main_key::HARD_EXTERNAL_DOCUMENTS_IDS_KEY)?;
|
||||||
let soft = self.main.get::<_, Str, ByteSlice>(rtxn, SOFT_EXTERNAL_DOCUMENTS_IDS_KEY)?;
|
let soft = self.main.get::<_, Str, ByteSlice>(rtxn, main_key::SOFT_EXTERNAL_DOCUMENTS_IDS_KEY)?;
|
||||||
let hard = match hard {
|
let hard = match hard {
|
||||||
Some(hard) => fst::Map::new(hard)?.map_data(Cow::Borrowed)?,
|
Some(hard) => fst::Map::new(hard)?.map_data(Cow::Borrowed)?,
|
||||||
None => fst::Map::default().map_data(Cow::Owned)?,
|
None => fst::Map::default().map_data(Cow::Owned)?,
|
||||||
@ -233,93 +254,112 @@ impl Index {
|
|||||||
|
|
||||||
/// Writes the fields ids map which associate the documents keys with an internal field id
|
/// Writes the fields ids map which associate the documents keys with an internal field id
|
||||||
/// (i.e. `u8`), this field id is used to identify fields in the obkv documents.
|
/// (i.e. `u8`), this field id is used to identify fields in the obkv documents.
|
||||||
pub fn put_fields_ids_map(&self, wtxn: &mut RwTxn, map: &FieldsIdsMap) -> heed::Result<()> {
|
pub(crate) fn put_fields_ids_map(&self, wtxn: &mut RwTxn, map: &FieldsIdsMap) -> heed::Result<()> {
|
||||||
self.main.put::<_, Str, SerdeJson<FieldsIdsMap>>(wtxn, FIELDS_IDS_MAP_KEY, map)
|
self.main.put::<_, Str, SerdeJson<FieldsIdsMap>>(wtxn, main_key::FIELDS_IDS_MAP_KEY, map)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the fields ids map which associate the documents keys with an internal field id
|
/// Returns the fields ids map which associate the documents keys with an internal field id
|
||||||
/// (i.e. `u8`), this field id is used to identify fields in the obkv documents.
|
/// (i.e. `u8`), this field id is used to identify fields in the obkv documents.
|
||||||
pub fn fields_ids_map(&self, rtxn: &RoTxn) -> heed::Result<FieldsIdsMap> {
|
pub fn fields_ids_map(&self, rtxn: &RoTxn) -> heed::Result<FieldsIdsMap> {
|
||||||
Ok(self.main.get::<_, Str, SerdeJson<FieldsIdsMap>>(rtxn, FIELDS_IDS_MAP_KEY)?.unwrap_or_default())
|
Ok(self.main.get::<_, Str, SerdeJson<FieldsIdsMap>>(
|
||||||
|
rtxn,
|
||||||
|
main_key::FIELDS_IDS_MAP_KEY,
|
||||||
|
)?.unwrap_or_default())
|
||||||
}
|
}
|
||||||
|
|
||||||
/* fields distribution */
|
/* fields distribution */
|
||||||
|
|
||||||
/// Writes the fields distribution which associates every field name with
|
/// Writes the fields distribution which associates every field name with
|
||||||
/// the number of times it occurs in the documents.
|
/// the number of times it occurs in the documents.
|
||||||
pub fn put_fields_distribution(&self, wtxn: &mut RwTxn, distribution: &FieldsDistribution) -> heed::Result<()> {
|
pub(crate) fn put_fields_distribution(&self, wtxn: &mut RwTxn, distribution: &FieldsDistribution) -> heed::Result<()> {
|
||||||
self.main.put::<_, Str, SerdeJson<FieldsDistribution>>(wtxn, FIELDS_DISTRIBUTION_KEY, distribution)
|
self.main.put::<_, Str, SerdeJson<FieldsDistribution>>(wtxn, main_key::FIELDS_DISTRIBUTION_KEY, distribution)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the fields distribution which associates every field name with
|
/// Returns the fields distribution which associates every field name with
|
||||||
/// the number of times it occurs in the documents.
|
/// the number of times it occurs in the documents.
|
||||||
pub fn fields_distribution(&self, rtxn: &RoTxn) -> heed::Result<FieldsDistribution> {
|
pub fn fields_distribution(&self, rtxn: &RoTxn) -> heed::Result<FieldsDistribution> {
|
||||||
Ok(self.main.get::<_, Str, SerdeJson<FieldsDistribution>>(rtxn, FIELDS_DISTRIBUTION_KEY)?.unwrap_or_default())
|
Ok(self.main.get::<_, Str, SerdeJson<FieldsDistribution>>(
|
||||||
|
rtxn,
|
||||||
|
main_key::FIELDS_DISTRIBUTION_KEY,
|
||||||
|
)?.unwrap_or_default())
|
||||||
}
|
}
|
||||||
|
|
||||||
/* displayed fields */
|
/* displayed fields */
|
||||||
|
|
||||||
/// Writes the fields that must be displayed in the defined order.
|
/// Writes the fields that must be displayed in the defined order.
|
||||||
/// There must be not be any duplicate field id.
|
/// There must be not be any duplicate field id.
|
||||||
pub fn put_displayed_fields(&self, wtxn: &mut RwTxn, fields: &[&str]) -> heed::Result<()> {
|
pub(crate) fn put_displayed_fields(&self, wtxn: &mut RwTxn, fields: &[&str]) -> heed::Result<()> {
|
||||||
self.main.put::<_, Str, SerdeBincode<&[&str]>>(wtxn, DISPLAYED_FIELDS_KEY, &fields)
|
self.main.put::<_, Str, SerdeBincode<&[&str]>>(wtxn, main_key::DISPLAYED_FIELDS_KEY, &fields)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Deletes the displayed fields ids, this will make the engine to display
|
/// Deletes the displayed fields ids, this will make the engine to display
|
||||||
/// all the documents attributes in the order of the `FieldsIdsMap`.
|
/// all the documents attributes in the order of the `FieldsIdsMap`.
|
||||||
pub fn delete_displayed_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
pub fn delete_displayed_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||||
self.main.delete::<_, Str>(wtxn, DISPLAYED_FIELDS_KEY)
|
self.main.delete::<_, Str>(wtxn, main_key::DISPLAYED_FIELDS_KEY)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the displayed fields in the order they were set by the user. If it returns
|
/// Returns the displayed fields in the order they were set by the user. If it returns
|
||||||
/// `None` it means that all the attributes are set as displayed in the order of the `FieldsIdsMap`.
|
/// `None` it means that all the attributes are set as displayed in the order of the `FieldsIdsMap`.
|
||||||
pub fn displayed_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<Vec<&'t str>>> {
|
pub fn displayed_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<Vec<&'t str>>> {
|
||||||
self.main.get::<_, Str, SerdeBincode<Vec<&'t str>>>(rtxn, DISPLAYED_FIELDS_KEY)
|
self.main.get::<_, Str, SerdeBincode<Vec<&'t str>>>(rtxn, main_key::DISPLAYED_FIELDS_KEY)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn displayed_fields_ids(&self, rtxn: &RoTxn) -> heed::Result<Option<Vec<FieldId>>> {
|
/// Identical to `displayed_fields`, but returns the ids instead.
|
||||||
let fields_ids_map = self.fields_ids_map(rtxn)?;
|
pub fn displayed_fields_ids(&self, rtxn: &RoTxn) -> Result<Option<Vec<FieldId>>> {
|
||||||
let ids = self.displayed_fields(rtxn)?
|
match self.displayed_fields(rtxn)? {
|
||||||
.map(|fields| fields
|
Some(fields) => {
|
||||||
.into_iter()
|
let fields_ids_map = self.fields_ids_map(rtxn)?;
|
||||||
.map(|name| fields_ids_map.id(name).expect("Field not found"))
|
let mut fields_ids = Vec::new();
|
||||||
.collect::<Vec<_>>());
|
for name in fields.into_iter() {
|
||||||
Ok(ids)
|
match fields_ids_map.id(name) {
|
||||||
|
Some(field_id) => fields_ids.push(field_id),
|
||||||
|
None => return Err(FieldIdMapMissingEntry::FieldName {
|
||||||
|
field_name: name.to_string(),
|
||||||
|
process: "Index::displayed_fields_ids",
|
||||||
|
}.into()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(Some(fields_ids))
|
||||||
|
},
|
||||||
|
None => Ok(None),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* searchable fields */
|
/* searchable fields */
|
||||||
|
|
||||||
/// Writes the searchable fields, when this list is specified, only these are indexed.
|
/// Writes the searchable fields, when this list is specified, only these are indexed.
|
||||||
pub fn put_searchable_fields(&self, wtxn: &mut RwTxn, fields: &[&str]) -> heed::Result<()> {
|
pub(crate) fn put_searchable_fields(&self, wtxn: &mut RwTxn, fields: &[&str]) -> heed::Result<()> {
|
||||||
self.main.put::<_, Str, SerdeBincode<&[&str]>>(wtxn, SEARCHABLE_FIELDS_KEY, &fields)
|
self.main.put::<_, Str, SerdeBincode<&[&str]>>(wtxn, main_key::SEARCHABLE_FIELDS_KEY, &fields)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Deletes the searchable fields, when no fields are specified, all fields are indexed.
|
/// Deletes the searchable fields, when no fields are specified, all fields are indexed.
|
||||||
pub fn delete_searchable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
pub fn delete_searchable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||||
self.main.delete::<_, Str>(wtxn, SEARCHABLE_FIELDS_KEY)
|
self.main.delete::<_, Str>(wtxn, main_key::SEARCHABLE_FIELDS_KEY)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the searchable fields, those are the fields that are indexed,
|
/// Returns the searchable fields, those are the fields that are indexed,
|
||||||
/// if the searchable fields aren't there it means that **all** the fields are indexed.
|
/// if the searchable fields aren't there it means that **all** the fields are indexed.
|
||||||
pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<Vec<&'t str>>> {
|
pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<Vec<&'t str>>> {
|
||||||
self.main.get::<_, Str, SerdeBincode<Vec<&'t str>>>(rtxn, SEARCHABLE_FIELDS_KEY)
|
self.main.get::<_, Str, SerdeBincode<Vec<&'t str>>>(rtxn, main_key::SEARCHABLE_FIELDS_KEY)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Identical to `searchable_fields`, but returns the ids instead.
|
/// Identical to `searchable_fields`, but returns the ids instead.
|
||||||
pub fn searchable_fields_ids(&self, rtxn: &RoTxn) -> heed::Result<Option<Vec<FieldId>>> {
|
pub fn searchable_fields_ids(&self, rtxn: &RoTxn) -> Result<Option<Vec<FieldId>>> {
|
||||||
match self.searchable_fields(rtxn)? {
|
match self.searchable_fields(rtxn)? {
|
||||||
Some(names) => {
|
Some(fields) => {
|
||||||
let fields_map = self.fields_ids_map(rtxn)?;
|
let fields_ids_map = self.fields_ids_map(rtxn)?;
|
||||||
let mut ids = Vec::new();
|
let mut fields_ids = Vec::new();
|
||||||
for name in names {
|
for name in fields {
|
||||||
let id = fields_map
|
match fields_ids_map.id(name) {
|
||||||
.id(name)
|
Some(field_id) => fields_ids.push(field_id),
|
||||||
.ok_or_else(|| format!("field id map must contain {:?}", name))
|
None => return Err(FieldIdMapMissingEntry::FieldName {
|
||||||
.expect("corrupted data: ");
|
field_name: name.to_string(),
|
||||||
ids.push(id);
|
process: "Index::searchable_fields_ids",
|
||||||
|
}.into()),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Ok(Some(ids))
|
Ok(Some(fields_ids))
|
||||||
}
|
},
|
||||||
None => Ok(None),
|
None => Ok(None),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -327,35 +367,42 @@ impl Index {
|
|||||||
/* filterable fields */
|
/* filterable fields */
|
||||||
|
|
||||||
/// Writes the filterable fields names in the database.
|
/// Writes the filterable fields names in the database.
|
||||||
pub fn put_filterable_fields(&self, wtxn: &mut RwTxn, fields: &HashSet<String>) -> heed::Result<()> {
|
pub(crate) fn put_filterable_fields(&self, wtxn: &mut RwTxn, fields: &HashSet<String>) -> heed::Result<()> {
|
||||||
self.main.put::<_, Str, SerdeJson<_>>(wtxn, FILTERABLE_FIELDS_KEY, fields)
|
self.main.put::<_, Str, SerdeJson<_>>(wtxn, main_key::FILTERABLE_FIELDS_KEY, fields)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Deletes the filterable fields ids in the database.
|
/// Deletes the filterable fields ids in the database.
|
||||||
pub fn delete_filterable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
pub fn delete_filterable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||||
self.main.delete::<_, Str>(wtxn, FILTERABLE_FIELDS_KEY)
|
self.main.delete::<_, Str>(wtxn, main_key::FILTERABLE_FIELDS_KEY)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the filterable fields names.
|
/// Returns the filterable fields names.
|
||||||
pub fn filterable_fields(&self, rtxn: &RoTxn) -> heed::Result<HashSet<String>> {
|
pub fn filterable_fields(&self, rtxn: &RoTxn) -> heed::Result<HashSet<String>> {
|
||||||
Ok(self.main.get::<_, Str, SerdeJson<_>>(rtxn, FILTERABLE_FIELDS_KEY)?.unwrap_or_default())
|
Ok(self.main.get::<_, Str, SerdeJson<_>>(
|
||||||
|
rtxn,
|
||||||
|
main_key::FILTERABLE_FIELDS_KEY,
|
||||||
|
)?.unwrap_or_default())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Same as `filterable_fields`, but returns ids instead.
|
/// Identical to `filterable_fields`, but returns ids instead.
|
||||||
pub fn filterable_fields_ids(&self, rtxn: &RoTxn) -> heed::Result<HashSet<FieldId>> {
|
pub fn filterable_fields_ids(&self, rtxn: &RoTxn) -> Result<HashSet<FieldId>> {
|
||||||
let filterable_fields = self.filterable_fields(rtxn)?;
|
let fields = self.filterable_fields(rtxn)?;
|
||||||
let fields_ids_map = self.fields_ids_map(rtxn)?;
|
let fields_ids_map = self.fields_ids_map(rtxn)?;
|
||||||
let filterable_fields = filterable_fields
|
|
||||||
.iter()
|
|
||||||
.map(|k| {
|
|
||||||
fields_ids_map
|
|
||||||
.id(k)
|
|
||||||
.ok_or_else(|| format!("{:?} should be present in the field id map", k))
|
|
||||||
.expect("corrupted data: ")
|
|
||||||
})
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
Ok(filterable_fields)
|
let mut fields_ids = HashSet::new();
|
||||||
|
for name in fields {
|
||||||
|
match fields_ids_map.id(&name) {
|
||||||
|
Some(field_id) => {
|
||||||
|
fields_ids.insert(field_id);
|
||||||
|
},
|
||||||
|
None => return Err(FieldIdMapMissingEntry::FieldName {
|
||||||
|
field_name: name,
|
||||||
|
process: "Index::filterable_fields_ids",
|
||||||
|
}.into()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(fields_ids)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* faceted documents ids */
|
/* faceted documents ids */
|
||||||
@ -363,7 +410,7 @@ impl Index {
|
|||||||
/// Returns the faceted fields names.
|
/// Returns the faceted fields names.
|
||||||
///
|
///
|
||||||
/// Faceted fields are the union of all the filterable, distinct, and Asc/Desc fields.
|
/// Faceted fields are the union of all the filterable, distinct, and Asc/Desc fields.
|
||||||
pub fn faceted_fields(&self, rtxn: &RoTxn) -> heed::Result<HashSet<String>> {
|
pub fn faceted_fields(&self, rtxn: &RoTxn) -> Result<HashSet<String>> {
|
||||||
let filterable_fields = self.filterable_fields(rtxn)?;
|
let filterable_fields = self.filterable_fields(rtxn)?;
|
||||||
let distinct_field = self.distinct_field(rtxn)?;
|
let distinct_field = self.distinct_field(rtxn)?;
|
||||||
let asc_desc_fields = self.criteria(rtxn)?
|
let asc_desc_fields = self.criteria(rtxn)?
|
||||||
@ -382,36 +429,40 @@ impl Index {
|
|||||||
Ok(faceted_fields)
|
Ok(faceted_fields)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Same as `faceted_fields`, but returns ids instead.
|
/// Identical to `faceted_fields`, but returns ids instead.
|
||||||
pub fn faceted_fields_ids(&self, rtxn: &RoTxn) -> heed::Result<HashSet<FieldId>> {
|
pub fn faceted_fields_ids(&self, rtxn: &RoTxn) -> Result<HashSet<FieldId>> {
|
||||||
let faceted_fields = self.faceted_fields(rtxn)?;
|
let fields = self.faceted_fields(rtxn)?;
|
||||||
let fields_ids_map = self.fields_ids_map(rtxn)?;
|
let fields_ids_map = self.fields_ids_map(rtxn)?;
|
||||||
let faceted_fields = faceted_fields
|
|
||||||
.iter()
|
|
||||||
.map(|k| {
|
|
||||||
fields_ids_map
|
|
||||||
.id(k)
|
|
||||||
.ok_or_else(|| format!("{:?} should be present in the field id map", k))
|
|
||||||
.expect("corrupted data: ")
|
|
||||||
})
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
Ok(faceted_fields)
|
let mut fields_ids = HashSet::new();
|
||||||
|
for name in fields.into_iter() {
|
||||||
|
match fields_ids_map.id(&name) {
|
||||||
|
Some(field_id) => {
|
||||||
|
fields_ids.insert(field_id);
|
||||||
|
},
|
||||||
|
None => return Err(FieldIdMapMissingEntry::FieldName {
|
||||||
|
field_name: name,
|
||||||
|
process: "Index::faceted_fields_ids",
|
||||||
|
}.into()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(fields_ids)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* faceted documents ids */
|
/* faceted documents ids */
|
||||||
|
|
||||||
/// Writes the documents ids that are faceted with numbers under this field id.
|
/// Writes the documents ids that are faceted with numbers under this field id.
|
||||||
pub fn put_number_faceted_documents_ids(
|
pub(crate) fn put_number_faceted_documents_ids(
|
||||||
&self,
|
&self,
|
||||||
wtxn: &mut RwTxn,
|
wtxn: &mut RwTxn,
|
||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
docids: &RoaringBitmap,
|
docids: &RoaringBitmap,
|
||||||
) -> heed::Result<()>
|
) -> heed::Result<()>
|
||||||
{
|
{
|
||||||
let mut buffer = [0u8; STRING_FACETED_DOCUMENTS_IDS_PREFIX.len() + 1];
|
let mut buffer = [0u8; main_key::STRING_FACETED_DOCUMENTS_IDS_PREFIX.len() + 1];
|
||||||
buffer[..STRING_FACETED_DOCUMENTS_IDS_PREFIX.len()]
|
buffer[..main_key::STRING_FACETED_DOCUMENTS_IDS_PREFIX.len()]
|
||||||
.copy_from_slice(STRING_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes());
|
.copy_from_slice(main_key::STRING_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes());
|
||||||
*buffer.last_mut().unwrap() = field_id;
|
*buffer.last_mut().unwrap() = field_id;
|
||||||
self.main.put::<_, ByteSlice, RoaringBitmapCodec>(wtxn, &buffer, docids)
|
self.main.put::<_, ByteSlice, RoaringBitmapCodec>(wtxn, &buffer, docids)
|
||||||
}
|
}
|
||||||
@ -423,9 +474,9 @@ impl Index {
|
|||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
) -> heed::Result<RoaringBitmap>
|
) -> heed::Result<RoaringBitmap>
|
||||||
{
|
{
|
||||||
let mut buffer = [0u8; STRING_FACETED_DOCUMENTS_IDS_PREFIX.len() + 1];
|
let mut buffer = [0u8; main_key::STRING_FACETED_DOCUMENTS_IDS_PREFIX.len() + 1];
|
||||||
buffer[..STRING_FACETED_DOCUMENTS_IDS_PREFIX.len()]
|
buffer[..main_key::STRING_FACETED_DOCUMENTS_IDS_PREFIX.len()]
|
||||||
.copy_from_slice(STRING_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes());
|
.copy_from_slice(main_key::STRING_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes());
|
||||||
*buffer.last_mut().unwrap() = field_id;
|
*buffer.last_mut().unwrap() = field_id;
|
||||||
match self.main.get::<_, ByteSlice, RoaringBitmapCodec>(rtxn, &buffer)? {
|
match self.main.get::<_, ByteSlice, RoaringBitmapCodec>(rtxn, &buffer)? {
|
||||||
Some(docids) => Ok(docids),
|
Some(docids) => Ok(docids),
|
||||||
@ -434,16 +485,16 @@ impl Index {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Writes the documents ids that are faceted with strings under this field id.
|
/// Writes the documents ids that are faceted with strings under this field id.
|
||||||
pub fn put_string_faceted_documents_ids(
|
pub(crate) fn put_string_faceted_documents_ids(
|
||||||
&self,
|
&self,
|
||||||
wtxn: &mut RwTxn,
|
wtxn: &mut RwTxn,
|
||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
docids: &RoaringBitmap,
|
docids: &RoaringBitmap,
|
||||||
) -> heed::Result<()>
|
) -> heed::Result<()>
|
||||||
{
|
{
|
||||||
let mut buffer = [0u8; NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len() + 1];
|
let mut buffer = [0u8; main_key::NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len() + 1];
|
||||||
buffer[..NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len()]
|
buffer[..main_key::NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len()]
|
||||||
.copy_from_slice(NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes());
|
.copy_from_slice(main_key::NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes());
|
||||||
*buffer.last_mut().unwrap() = field_id;
|
*buffer.last_mut().unwrap() = field_id;
|
||||||
self.main.put::<_, ByteSlice, RoaringBitmapCodec>(wtxn, &buffer, docids)
|
self.main.put::<_, ByteSlice, RoaringBitmapCodec>(wtxn, &buffer, docids)
|
||||||
}
|
}
|
||||||
@ -455,9 +506,9 @@ impl Index {
|
|||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
) -> heed::Result<RoaringBitmap>
|
) -> heed::Result<RoaringBitmap>
|
||||||
{
|
{
|
||||||
let mut buffer = [0u8; NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len() + 1];
|
let mut buffer = [0u8; main_key::NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len() + 1];
|
||||||
buffer[..NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len()]
|
buffer[..main_key::NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len()]
|
||||||
.copy_from_slice(NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes());
|
.copy_from_slice(main_key::NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes());
|
||||||
*buffer.last_mut().unwrap() = field_id;
|
*buffer.last_mut().unwrap() = field_id;
|
||||||
match self.main.get::<_, ByteSlice, RoaringBitmapCodec>(rtxn, &buffer)? {
|
match self.main.get::<_, ByteSlice, RoaringBitmapCodec>(rtxn, &buffer)? {
|
||||||
Some(docids) => Ok(docids),
|
Some(docids) => Ok(docids),
|
||||||
@ -468,29 +519,29 @@ impl Index {
|
|||||||
/* distinct field */
|
/* distinct field */
|
||||||
|
|
||||||
pub(crate) fn put_distinct_field(&self, wtxn: &mut RwTxn, distinct_field: &str) -> heed::Result<()> {
|
pub(crate) fn put_distinct_field(&self, wtxn: &mut RwTxn, distinct_field: &str) -> heed::Result<()> {
|
||||||
self.main.put::<_, Str, Str>(wtxn, DISTINCT_FIELD_KEY, distinct_field)
|
self.main.put::<_, Str, Str>(wtxn, main_key::DISTINCT_FIELD_KEY, distinct_field)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn distinct_field<'a>(&self, rtxn: &'a RoTxn) -> heed::Result<Option<&'a str>> {
|
pub fn distinct_field<'a>(&self, rtxn: &'a RoTxn) -> heed::Result<Option<&'a str>> {
|
||||||
self.main.get::<_, Str, Str>(rtxn, DISTINCT_FIELD_KEY)
|
self.main.get::<_, Str, Str>(rtxn, main_key::DISTINCT_FIELD_KEY)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn delete_distinct_field(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
pub(crate) fn delete_distinct_field(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||||
self.main.delete::<_, Str>(wtxn, DISTINCT_FIELD_KEY)
|
self.main.delete::<_, Str>(wtxn, main_key::DISTINCT_FIELD_KEY)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* criteria */
|
/* criteria */
|
||||||
|
|
||||||
pub fn put_criteria(&self, wtxn: &mut RwTxn, criteria: &[Criterion]) -> heed::Result<()> {
|
pub(crate) fn put_criteria(&self, wtxn: &mut RwTxn, criteria: &[Criterion]) -> heed::Result<()> {
|
||||||
self.main.put::<_, Str, SerdeJson<&[Criterion]>>(wtxn, CRITERIA_KEY, &criteria)
|
self.main.put::<_, Str, SerdeJson<&[Criterion]>>(wtxn, main_key::CRITERIA_KEY, &criteria)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn delete_criteria(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
pub fn delete_criteria(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||||
self.main.delete::<_, Str>(wtxn, CRITERIA_KEY)
|
self.main.delete::<_, Str>(wtxn, main_key::CRITERIA_KEY)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn criteria(&self, rtxn: &RoTxn) -> heed::Result<Vec<Criterion>> {
|
pub fn criteria(&self, rtxn: &RoTxn) -> heed::Result<Vec<Criterion>> {
|
||||||
match self.main.get::<_, Str, SerdeJson<Vec<Criterion>>>(rtxn, CRITERIA_KEY)? {
|
match self.main.get::<_, Str, SerdeJson<Vec<Criterion>>>(rtxn, main_key::CRITERIA_KEY)? {
|
||||||
Some(criteria) => Ok(criteria),
|
Some(criteria) => Ok(criteria),
|
||||||
None => Ok(default_criteria()),
|
None => Ok(default_criteria()),
|
||||||
}
|
}
|
||||||
@ -499,13 +550,13 @@ impl Index {
|
|||||||
/* words fst */
|
/* words fst */
|
||||||
|
|
||||||
/// Writes the FST which is the words dictionary of the engine.
|
/// Writes the FST which is the words dictionary of the engine.
|
||||||
pub fn put_words_fst<A: AsRef<[u8]>>(&self, wtxn: &mut RwTxn, fst: &fst::Set<A>) -> heed::Result<()> {
|
pub(crate) fn put_words_fst<A: AsRef<[u8]>>(&self, wtxn: &mut RwTxn, fst: &fst::Set<A>) -> heed::Result<()> {
|
||||||
self.main.put::<_, Str, ByteSlice>(wtxn, WORDS_FST_KEY, fst.as_fst().as_bytes())
|
self.main.put::<_, Str, ByteSlice>(wtxn, main_key::WORDS_FST_KEY, fst.as_fst().as_bytes())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the FST which is the words dictionary of the engine.
|
/// Returns the FST which is the words dictionary of the engine.
|
||||||
pub fn words_fst<'t>(&self, rtxn: &'t RoTxn) -> anyhow::Result<fst::Set<Cow<'t, [u8]>>> {
|
pub fn words_fst<'t>(&self, rtxn: &'t RoTxn) -> Result<fst::Set<Cow<'t, [u8]>>> {
|
||||||
match self.main.get::<_, Str, ByteSlice>(rtxn, WORDS_FST_KEY)? {
|
match self.main.get::<_, Str, ByteSlice>(rtxn, main_key::WORDS_FST_KEY)? {
|
||||||
Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?),
|
Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?),
|
||||||
None => Ok(fst::Set::default().map_data(Cow::Owned)?),
|
None => Ok(fst::Set::default().map_data(Cow::Owned)?),
|
||||||
}
|
}
|
||||||
@ -513,16 +564,16 @@ impl Index {
|
|||||||
|
|
||||||
/* stop words */
|
/* stop words */
|
||||||
|
|
||||||
pub fn put_stop_words<A: AsRef<[u8]>>(&self, wtxn: &mut RwTxn, fst: &fst::Set<A>) -> heed::Result<()> {
|
pub(crate) fn put_stop_words<A: AsRef<[u8]>>(&self, wtxn: &mut RwTxn, fst: &fst::Set<A>) -> heed::Result<()> {
|
||||||
self.main.put::<_, Str, ByteSlice>(wtxn, STOP_WORDS_KEY, fst.as_fst().as_bytes())
|
self.main.put::<_, Str, ByteSlice>(wtxn, main_key::STOP_WORDS_KEY, fst.as_fst().as_bytes())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn delete_stop_words(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
pub fn delete_stop_words(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||||
self.main.delete::<_, Str>(wtxn, STOP_WORDS_KEY)
|
self.main.delete::<_, Str>(wtxn, main_key::STOP_WORDS_KEY)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn stop_words<'t>(&self, rtxn: &'t RoTxn) -> anyhow::Result<Option<fst::Set<&'t [u8]>>> {
|
pub fn stop_words<'t>(&self, rtxn: &'t RoTxn) -> Result<Option<fst::Set<&'t [u8]>>> {
|
||||||
match self.main.get::<_, Str, ByteSlice>(rtxn, STOP_WORDS_KEY)? {
|
match self.main.get::<_, Str, ByteSlice>(rtxn, main_key::STOP_WORDS_KEY)? {
|
||||||
Some(bytes) => Ok(Some(fst::Set::new(bytes)?)),
|
Some(bytes) => Ok(Some(fst::Set::new(bytes)?)),
|
||||||
None => Ok(None),
|
None => Ok(None),
|
||||||
}
|
}
|
||||||
@ -530,19 +581,29 @@ impl Index {
|
|||||||
|
|
||||||
/* synonyms */
|
/* synonyms */
|
||||||
|
|
||||||
pub fn put_synonyms(&self, wtxn: &mut RwTxn, synonyms: &HashMap<Vec<String>, Vec<Vec<String>>>) -> heed::Result<()> {
|
pub(crate) fn put_synonyms(
|
||||||
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, SYNONYMS_KEY, synonyms)
|
&self,
|
||||||
|
wtxn: &mut RwTxn,
|
||||||
|
synonyms: &HashMap<Vec<String>, Vec<Vec<String>>>,
|
||||||
|
) -> heed::Result<()>
|
||||||
|
{
|
||||||
|
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SYNONYMS_KEY, synonyms)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn delete_synonyms(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
pub fn delete_synonyms(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||||
self.main.delete::<_, Str>(wtxn, SYNONYMS_KEY)
|
self.main.delete::<_, Str>(wtxn, main_key::SYNONYMS_KEY)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn synonyms(&self, rtxn: &RoTxn) -> heed::Result<HashMap<Vec<String>, Vec<Vec<String>>>> {
|
pub fn synonyms(&self, rtxn: &RoTxn) -> heed::Result<HashMap<Vec<String>, Vec<Vec<String>>>> {
|
||||||
Ok(self.main.get::<_, Str, SerdeBincode<_>>(rtxn, SYNONYMS_KEY)?.unwrap_or_default())
|
Ok(self.main.get::<_, Str, SerdeBincode<_>>(rtxn, main_key::SYNONYMS_KEY)?.unwrap_or_default())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn words_synonyms<S: AsRef<str>>(&self, rtxn: &RoTxn, words: &[S]) -> heed::Result<Option<Vec<Vec<String>>>> {
|
pub fn words_synonyms<S: AsRef<str>>(
|
||||||
|
&self,
|
||||||
|
rtxn: &RoTxn,
|
||||||
|
words: &[S],
|
||||||
|
) -> heed::Result<Option<Vec<Vec<String>>>>
|
||||||
|
{
|
||||||
let words: Vec<_> = words.iter().map(|s| s.as_ref().to_owned()).collect();
|
let words: Vec<_> = words.iter().map(|s| s.as_ref().to_owned()).collect();
|
||||||
Ok(self.synonyms(rtxn)?.remove(&words))
|
Ok(self.synonyms(rtxn)?.remove(&words))
|
||||||
}
|
}
|
||||||
@ -550,13 +611,13 @@ impl Index {
|
|||||||
/* words prefixes fst */
|
/* words prefixes fst */
|
||||||
|
|
||||||
/// Writes the FST which is the words prefixes dictionnary of the engine.
|
/// Writes the FST which is the words prefixes dictionnary of the engine.
|
||||||
pub fn put_words_prefixes_fst<A: AsRef<[u8]>>(&self, wtxn: &mut RwTxn, fst: &fst::Set<A>) -> heed::Result<()> {
|
pub(crate) fn put_words_prefixes_fst<A: AsRef<[u8]>>(&self, wtxn: &mut RwTxn, fst: &fst::Set<A>) -> heed::Result<()> {
|
||||||
self.main.put::<_, Str, ByteSlice>(wtxn, WORDS_PREFIXES_FST_KEY, fst.as_fst().as_bytes())
|
self.main.put::<_, Str, ByteSlice>(wtxn, main_key::WORDS_PREFIXES_FST_KEY, fst.as_fst().as_bytes())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the FST which is the words prefixes dictionnary of the engine.
|
/// Returns the FST which is the words prefixes dictionnary of the engine.
|
||||||
pub fn words_prefixes_fst<'t>(&self, rtxn: &'t RoTxn) -> anyhow::Result<fst::Set<Cow<'t, [u8]>>> {
|
pub fn words_prefixes_fst<'t>(&self, rtxn: &'t RoTxn) -> Result<fst::Set<Cow<'t, [u8]>>> {
|
||||||
match self.main.get::<_, Str, ByteSlice>(rtxn, WORDS_PREFIXES_FST_KEY)? {
|
match self.main.get::<_, Str, ByteSlice>(rtxn, main_key::WORDS_PREFIXES_FST_KEY)? {
|
||||||
Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?),
|
Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?),
|
||||||
None => Ok(fst::Set::default().map_data(Cow::Owned)?),
|
None => Ok(fst::Set::default().map_data(Cow::Owned)?),
|
||||||
}
|
}
|
||||||
@ -577,13 +638,13 @@ impl Index {
|
|||||||
&self,
|
&self,
|
||||||
rtxn: &'t RoTxn,
|
rtxn: &'t RoTxn,
|
||||||
ids: impl IntoIterator<Item=DocumentId>,
|
ids: impl IntoIterator<Item=DocumentId>,
|
||||||
) -> anyhow::Result<Vec<(DocumentId, obkv::KvReader<'t>)>>
|
) -> Result<Vec<(DocumentId, obkv::KvReader<'t>)>>
|
||||||
{
|
{
|
||||||
let mut documents = Vec::new();
|
let mut documents = Vec::new();
|
||||||
|
|
||||||
for id in ids {
|
for id in ids {
|
||||||
let kv = self.documents.get(rtxn, &BEU32::new(id))?
|
let kv = self.documents.get(rtxn, &BEU32::new(id))?
|
||||||
.with_context(|| format!("Could not find document {}", id))?;
|
.ok_or_else(|| UserError::UnknownInternalDocumentId { document_id: id })?;
|
||||||
documents.push((id, kv));
|
documents.push((id, kv));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -594,7 +655,7 @@ impl Index {
|
|||||||
pub fn all_documents<'t>(
|
pub fn all_documents<'t>(
|
||||||
&self,
|
&self,
|
||||||
rtxn: &'t RoTxn,
|
rtxn: &'t RoTxn,
|
||||||
) -> anyhow::Result<impl Iterator<Item = heed::Result<(DocumentId, obkv::KvReader<'t>)>>> {
|
) -> Result<impl Iterator<Item = heed::Result<(DocumentId, obkv::KvReader<'t>)>>> {
|
||||||
Ok(self
|
Ok(self
|
||||||
.documents
|
.documents
|
||||||
.iter(rtxn)?
|
.iter(rtxn)?
|
||||||
@ -611,23 +672,27 @@ impl Index {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the index creation time.
|
/// Returns the index creation time.
|
||||||
pub fn created_at(&self, rtxn: &RoTxn) -> heed::Result<DateTime<Utc>> {
|
pub fn created_at(&self, rtxn: &RoTxn) -> Result<DateTime<Utc>> {
|
||||||
let time = self.main
|
Ok(self.main
|
||||||
.get::<_, Str, SerdeJson<DateTime<Utc>>>(rtxn, CREATED_AT_KEY)?
|
.get::<_, Str, SerdeJson<DateTime<Utc>>>(rtxn, main_key::CREATED_AT_KEY)?
|
||||||
.expect("Index without creation time");
|
.ok_or(InternalError::DatabaseMissingEntry {
|
||||||
Ok(time)
|
db_name: db_name::MAIN,
|
||||||
|
key: Some(main_key::CREATED_AT_KEY),
|
||||||
|
})?)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the index last updated time.
|
/// Returns the index last updated time.
|
||||||
pub fn updated_at(&self, rtxn: &RoTxn) -> heed::Result<DateTime<Utc>> {
|
pub fn updated_at(&self, rtxn: &RoTxn) -> Result<DateTime<Utc>> {
|
||||||
let time = self.main
|
Ok(self.main
|
||||||
.get::<_, Str, SerdeJson<DateTime<Utc>>>(rtxn, UPDATED_AT_KEY)?
|
.get::<_, Str, SerdeJson<DateTime<Utc>>>(rtxn, main_key::UPDATED_AT_KEY)?
|
||||||
.expect("Index without update time");
|
.ok_or(InternalError::DatabaseMissingEntry {
|
||||||
Ok(time)
|
db_name: db_name::MAIN,
|
||||||
|
key: Some(main_key::UPDATED_AT_KEY),
|
||||||
|
})?)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn set_updated_at(&self, wtxn: &mut RwTxn, time: &DateTime<Utc>) -> heed::Result<()> {
|
pub(crate) fn set_updated_at(&self, wtxn: &mut RwTxn, time: &DateTime<Utc>) -> heed::Result<()> {
|
||||||
self.main.put::<_, Str, SerdeJson<DateTime<Utc>>>(wtxn, UPDATED_AT_KEY, &time)
|
self.main.put::<_, Str, SerdeJson<DateTime<Utc>>>(wtxn, main_key::UPDATED_AT_KEY, &time)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
#[macro_use] extern crate pest_derive;
|
#[macro_use] extern crate pest_derive;
|
||||||
|
|
||||||
mod criterion;
|
mod criterion;
|
||||||
|
mod error;
|
||||||
mod external_documents_ids;
|
mod external_documents_ids;
|
||||||
mod fields_ids_map;
|
mod fields_ids_map;
|
||||||
mod search;
|
mod search;
|
||||||
@ -14,12 +15,13 @@ pub mod update;
|
|||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::hash::BuildHasherDefault;
|
use std::hash::BuildHasherDefault;
|
||||||
|
use std::result::Result as StdResult;
|
||||||
|
|
||||||
use anyhow::Context;
|
|
||||||
use fxhash::{FxHasher32, FxHasher64};
|
use fxhash::{FxHasher32, FxHasher64};
|
||||||
use serde_json::{Map, Value};
|
use serde_json::{Map, Value};
|
||||||
|
|
||||||
pub use self::criterion::{Criterion, default_criteria};
|
pub use self::criterion::{Criterion, default_criteria};
|
||||||
|
pub use self::error::Error;
|
||||||
pub use self::external_documents_ids::ExternalDocumentsIds;
|
pub use self::external_documents_ids::ExternalDocumentsIds;
|
||||||
pub use self::fields_ids_map::FieldsIdsMap;
|
pub use self::fields_ids_map::FieldsIdsMap;
|
||||||
pub use self::heed_codec::{BEU32StrCodec, StrStrU8Codec, StrLevelPositionCodec, ObkvCodec, FieldIdWordCountCodec};
|
pub use self::heed_codec::{BEU32StrCodec, StrStrU8Codec, StrLevelPositionCodec, ObkvCodec, FieldIdWordCountCodec};
|
||||||
@ -29,6 +31,8 @@ pub use self::index::Index;
|
|||||||
pub use self::search::{Search, FacetDistribution, FilterCondition, SearchResult, MatchingWords};
|
pub use self::search::{Search, FacetDistribution, FilterCondition, SearchResult, MatchingWords};
|
||||||
pub use self::tree_level::TreeLevel;
|
pub use self::tree_level::TreeLevel;
|
||||||
|
|
||||||
|
pub type Result<T> = std::result::Result<T, error::Error>;
|
||||||
|
|
||||||
pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>;
|
pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>;
|
||||||
pub type FastMap8<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher64>>;
|
pub type FastMap8<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher64>>;
|
||||||
pub type SmallString32 = smallstr::SmallString<[u8; 32]>;
|
pub type SmallString32 = smallstr::SmallString<[u8; 32]>;
|
||||||
@ -43,21 +47,24 @@ pub type FieldId = u8;
|
|||||||
pub type Position = u32;
|
pub type Position = u32;
|
||||||
pub type FieldsDistribution = HashMap<String, u64>;
|
pub type FieldsDistribution = HashMap<String, u64>;
|
||||||
|
|
||||||
type MergeFn = for<'a> fn(&[u8], &[Cow<'a, [u8]>]) -> anyhow::Result<Vec<u8>>;
|
type MergeFn<E> = for<'a> fn(&[u8], &[Cow<'a, [u8]>]) -> StdResult<Vec<u8>, E>;
|
||||||
|
|
||||||
/// Transform a raw obkv store into a JSON Object.
|
/// Transform a raw obkv store into a JSON Object.
|
||||||
pub fn obkv_to_json(
|
pub fn obkv_to_json(
|
||||||
displayed_fields: &[FieldId],
|
displayed_fields: &[FieldId],
|
||||||
fields_ids_map: &FieldsIdsMap,
|
fields_ids_map: &FieldsIdsMap,
|
||||||
obkv: obkv::KvReader,
|
obkv: obkv::KvReader,
|
||||||
) -> anyhow::Result<Map<String, Value>>
|
) -> Result<Map<String, Value>>
|
||||||
{
|
{
|
||||||
displayed_fields.iter()
|
displayed_fields.iter()
|
||||||
.copied()
|
.copied()
|
||||||
.flat_map(|id| obkv.get(id).map(|value| (id, value)))
|
.flat_map(|id| obkv.get(id).map(|value| (id, value)))
|
||||||
.map(|(id, value)| {
|
.map(|(id, value)| {
|
||||||
let name = fields_ids_map.name(id).context("unknown obkv field id")?;
|
let name = fields_ids_map.name(id).ok_or(error::FieldIdMapMissingEntry::FieldId {
|
||||||
let value = serde_json::from_slice(value)?;
|
field_id: id,
|
||||||
|
process: "obkv_to_json",
|
||||||
|
})?;
|
||||||
|
let value = serde_json::from_slice(value).map_err(error::InternalError::SerdeJson)?;
|
||||||
Ok((name.to_owned(), value))
|
Ok((name.to_owned(), value))
|
||||||
})
|
})
|
||||||
.collect()
|
.collect()
|
||||||
|
@ -1,15 +1,15 @@
|
|||||||
use std::mem::take;
|
use std::mem::take;
|
||||||
|
|
||||||
use anyhow::Context;
|
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use log::debug;
|
use log::debug;
|
||||||
use ordered_float::OrderedFloat;
|
use ordered_float::OrderedFloat;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
|
use crate::error::FieldIdMapMissingEntry;
|
||||||
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
|
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
|
||||||
use crate::search::facet::FacetIter;
|
use crate::search::facet::FacetIter;
|
||||||
use crate::search::query_tree::Operation;
|
use crate::search::query_tree::Operation;
|
||||||
use crate::{FieldId, Index};
|
use crate::{FieldId, Index, Result};
|
||||||
use super::{Criterion, CriterionParameters, CriterionResult};
|
use super::{Criterion, CriterionParameters, CriterionResult};
|
||||||
|
|
||||||
/// Threshold on the number of candidates that will make
|
/// Threshold on the number of candidates that will make
|
||||||
@ -36,7 +36,7 @@ impl<'t> AscDesc<'t> {
|
|||||||
rtxn: &'t heed::RoTxn,
|
rtxn: &'t heed::RoTxn,
|
||||||
parent: Box<dyn Criterion + 't>,
|
parent: Box<dyn Criterion + 't>,
|
||||||
field_name: String,
|
field_name: String,
|
||||||
) -> anyhow::Result<Self> {
|
) -> Result<Self> {
|
||||||
Self::new(index, rtxn, parent, field_name, true)
|
Self::new(index, rtxn, parent, field_name, true)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -45,7 +45,7 @@ impl<'t> AscDesc<'t> {
|
|||||||
rtxn: &'t heed::RoTxn,
|
rtxn: &'t heed::RoTxn,
|
||||||
parent: Box<dyn Criterion + 't>,
|
parent: Box<dyn Criterion + 't>,
|
||||||
field_name: String,
|
field_name: String,
|
||||||
) -> anyhow::Result<Self> {
|
) -> Result<Self> {
|
||||||
Self::new(index, rtxn, parent, field_name, false)
|
Self::new(index, rtxn, parent, field_name, false)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -55,11 +55,14 @@ impl<'t> AscDesc<'t> {
|
|||||||
parent: Box<dyn Criterion + 't>,
|
parent: Box<dyn Criterion + 't>,
|
||||||
field_name: String,
|
field_name: String,
|
||||||
ascending: bool,
|
ascending: bool,
|
||||||
) -> anyhow::Result<Self> {
|
) -> Result<Self> {
|
||||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||||
let field_id = fields_ids_map
|
let field_id = fields_ids_map
|
||||||
.id(&field_name)
|
.id(&field_name)
|
||||||
.with_context(|| format!("field {:?} isn't registered", field_name))?;
|
.ok_or_else(|| FieldIdMapMissingEntry::FieldName {
|
||||||
|
field_name: field_name.clone(),
|
||||||
|
process: "AscDesc::new",
|
||||||
|
})?;
|
||||||
|
|
||||||
Ok(AscDesc {
|
Ok(AscDesc {
|
||||||
index,
|
index,
|
||||||
@ -79,7 +82,7 @@ impl<'t> AscDesc<'t> {
|
|||||||
|
|
||||||
impl<'t> Criterion for AscDesc<'t> {
|
impl<'t> Criterion for AscDesc<'t> {
|
||||||
#[logging_timer::time("AscDesc::{}")]
|
#[logging_timer::time("AscDesc::{}")]
|
||||||
fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> {
|
fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
|
||||||
// remove excluded candidates when next is called, instead of doing it in the loop.
|
// remove excluded candidates when next is called, instead of doing it in the loop.
|
||||||
self.allowed_candidates -= params.excluded_candidates;
|
self.allowed_candidates -= params.excluded_candidates;
|
||||||
|
|
||||||
@ -162,7 +165,7 @@ fn facet_ordered<'t>(
|
|||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
ascending: bool,
|
ascending: bool,
|
||||||
candidates: RoaringBitmap,
|
candidates: RoaringBitmap,
|
||||||
) -> anyhow::Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>> {
|
) -> Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>> {
|
||||||
if candidates.len() <= CANDIDATES_THRESHOLD {
|
if candidates.len() <= CANDIDATES_THRESHOLD {
|
||||||
let iter = iterative_facet_ordered_iter(index, rtxn, field_id, ascending, candidates)?;
|
let iter = iterative_facet_ordered_iter(index, rtxn, field_id, ascending, candidates)?;
|
||||||
Ok(Box::new(iter.map(Ok)) as Box<dyn Iterator<Item = _>>)
|
Ok(Box::new(iter.map(Ok)) as Box<dyn Iterator<Item = _>>)
|
||||||
@ -186,7 +189,7 @@ fn iterative_facet_ordered_iter<'t>(
|
|||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
ascending: bool,
|
ascending: bool,
|
||||||
candidates: RoaringBitmap,
|
candidates: RoaringBitmap,
|
||||||
) -> anyhow::Result<impl Iterator<Item = RoaringBitmap> + 't> {
|
) -> Result<impl Iterator<Item = RoaringBitmap> + 't> {
|
||||||
let mut docids_values = Vec::with_capacity(candidates.len() as usize);
|
let mut docids_values = Vec::with_capacity(candidates.len() as usize);
|
||||||
for docid in candidates.iter() {
|
for docid in candidates.iter() {
|
||||||
let left = (field_id, docid, f64::MIN);
|
let left = (field_id, docid, f64::MIN);
|
||||||
|
@ -5,7 +5,7 @@ use std::mem::take;
|
|||||||
|
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::{TreeLevel, search::build_dfa};
|
use crate::{TreeLevel, Result, search::build_dfa};
|
||||||
use crate::search::criteria::Query;
|
use crate::search::criteria::Query;
|
||||||
use crate::search::query_tree::{Operation, QueryKind};
|
use crate::search::query_tree::{Operation, QueryKind};
|
||||||
use crate::search::{word_derivations, WordDerivationsCache};
|
use crate::search::{word_derivations, WordDerivationsCache};
|
||||||
@ -48,7 +48,7 @@ impl<'t> Attribute<'t> {
|
|||||||
|
|
||||||
impl<'t> Criterion for Attribute<'t> {
|
impl<'t> Criterion for Attribute<'t> {
|
||||||
#[logging_timer::time("Attribute::{}")]
|
#[logging_timer::time("Attribute::{}")]
|
||||||
fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> {
|
fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
|
||||||
// remove excluded candidates when next is called, instead of doing it in the loop.
|
// remove excluded candidates when next is called, instead of doing it in the loop.
|
||||||
if let Some((_, _, allowed_candidates)) = self.state.as_mut() {
|
if let Some((_, _, allowed_candidates)) = self.state.as_mut() {
|
||||||
*allowed_candidates -= params.excluded_candidates;
|
*allowed_candidates -= params.excluded_candidates;
|
||||||
@ -224,7 +224,12 @@ struct QueryLevelIterator<'t, 'q> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<'t, 'q> QueryLevelIterator<'t, 'q> {
|
impl<'t, 'q> QueryLevelIterator<'t, 'q> {
|
||||||
fn new(ctx: &'t dyn Context<'t>, queries: &'q [Query], wdcache: &mut WordDerivationsCache) -> anyhow::Result<Option<Self>> {
|
fn new(
|
||||||
|
ctx: &'t dyn Context<'t>,
|
||||||
|
queries: &'q [Query],
|
||||||
|
wdcache: &mut WordDerivationsCache,
|
||||||
|
) -> Result<Option<Self>>
|
||||||
|
{
|
||||||
let mut inner = Vec::with_capacity(queries.len());
|
let mut inner = Vec::with_capacity(queries.len());
|
||||||
for query in queries {
|
for query in queries {
|
||||||
match &query.kind {
|
match &query.kind {
|
||||||
@ -471,7 +476,7 @@ fn initialize_query_level_iterators<'t, 'q>(
|
|||||||
branches: &'q FlattenedQueryTree,
|
branches: &'q FlattenedQueryTree,
|
||||||
allowed_candidates: &RoaringBitmap,
|
allowed_candidates: &RoaringBitmap,
|
||||||
wdcache: &mut WordDerivationsCache,
|
wdcache: &mut WordDerivationsCache,
|
||||||
) -> anyhow::Result<BinaryHeap<Branch<'t, 'q>>> {
|
) -> Result<BinaryHeap<Branch<'t, 'q>>> {
|
||||||
|
|
||||||
let mut positions = BinaryHeap::with_capacity(branches.len());
|
let mut positions = BinaryHeap::with_capacity(branches.len());
|
||||||
for branch in branches {
|
for branch in branches {
|
||||||
@ -521,7 +526,7 @@ fn set_compute_candidates<'t>(
|
|||||||
branches: &FlattenedQueryTree,
|
branches: &FlattenedQueryTree,
|
||||||
allowed_candidates: &RoaringBitmap,
|
allowed_candidates: &RoaringBitmap,
|
||||||
wdcache: &mut WordDerivationsCache,
|
wdcache: &mut WordDerivationsCache,
|
||||||
) -> anyhow::Result<Option<RoaringBitmap>>
|
) -> Result<Option<RoaringBitmap>>
|
||||||
{
|
{
|
||||||
let mut branches_heap = initialize_query_level_iterators(ctx, branches, allowed_candidates, wdcache)?;
|
let mut branches_heap = initialize_query_level_iterators(ctx, branches, allowed_candidates, wdcache)?;
|
||||||
let lowest_level = TreeLevel::min_value();
|
let lowest_level = TreeLevel::min_value();
|
||||||
@ -573,7 +578,7 @@ fn linear_compute_candidates(
|
|||||||
ctx: &dyn Context,
|
ctx: &dyn Context,
|
||||||
branches: &FlattenedQueryTree,
|
branches: &FlattenedQueryTree,
|
||||||
allowed_candidates: &RoaringBitmap,
|
allowed_candidates: &RoaringBitmap,
|
||||||
) -> anyhow::Result<BTreeMap<u64, RoaringBitmap>>
|
) -> Result<BTreeMap<u64, RoaringBitmap>>
|
||||||
{
|
{
|
||||||
fn compute_candidate_rank(branches: &FlattenedQueryTree, words_positions: HashMap<String, RoaringBitmap>) -> u64 {
|
fn compute_candidate_rank(branches: &FlattenedQueryTree, words_positions: HashMap<String, RoaringBitmap>) -> u64 {
|
||||||
let mut min_rank = u64::max_value();
|
let mut min_rank = u64::max_value();
|
||||||
|
@ -14,7 +14,7 @@ use crate::search::criteria::{
|
|||||||
CriterionResult,
|
CriterionResult,
|
||||||
resolve_query_tree,
|
resolve_query_tree,
|
||||||
};
|
};
|
||||||
use crate::TreeLevel;
|
use crate::{TreeLevel, Result};
|
||||||
|
|
||||||
pub struct Exactness<'t> {
|
pub struct Exactness<'t> {
|
||||||
ctx: &'t dyn Context<'t>,
|
ctx: &'t dyn Context<'t>,
|
||||||
@ -45,7 +45,7 @@ impl<'t> Exactness<'t> {
|
|||||||
|
|
||||||
impl<'t> Criterion for Exactness<'t> {
|
impl<'t> Criterion for Exactness<'t> {
|
||||||
#[logging_timer::time("Exactness::{}")]
|
#[logging_timer::time("Exactness::{}")]
|
||||||
fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> {
|
fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
|
||||||
// remove excluded candidates when next is called, instead of doing it in the loop.
|
// remove excluded candidates when next is called, instead of doing it in the loop.
|
||||||
if let Some(state) = self.state.as_mut() {
|
if let Some(state) = self.state.as_mut() {
|
||||||
state.difference_with(params.excluded_candidates);
|
state.difference_with(params.excluded_candidates);
|
||||||
@ -158,7 +158,7 @@ fn resolve_state(
|
|||||||
ctx: &dyn Context,
|
ctx: &dyn Context,
|
||||||
state: State,
|
state: State,
|
||||||
query: &[ExactQueryPart],
|
query: &[ExactQueryPart],
|
||||||
) -> anyhow::Result<(RoaringBitmap, Option<State>)>
|
) -> Result<(RoaringBitmap, Option<State>)>
|
||||||
{
|
{
|
||||||
use State::*;
|
use State::*;
|
||||||
match state {
|
match state {
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
use log::debug;
|
use log::debug;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
|
use crate::Result;
|
||||||
use crate::search::query_tree::Operation;
|
use crate::search::query_tree::Operation;
|
||||||
use crate::search::WordDerivationsCache;
|
use crate::search::WordDerivationsCache;
|
||||||
use super::{resolve_query_tree, Criterion, CriterionResult, CriterionParameters, Context};
|
use super::{resolve_query_tree, Criterion, CriterionResult, CriterionParameters, Context};
|
||||||
@ -29,7 +30,7 @@ impl<'t> Final<'t> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[logging_timer::time("Final::{}")]
|
#[logging_timer::time("Final::{}")]
|
||||||
pub fn next(&mut self, excluded_candidates: &RoaringBitmap) -> anyhow::Result<Option<FinalResult>> {
|
pub fn next(&mut self, excluded_candidates: &RoaringBitmap) -> Result<Option<FinalResult>> {
|
||||||
debug!("Final iteration");
|
debug!("Final iteration");
|
||||||
let excluded_candidates = &self.returned_candidates | excluded_candidates;
|
let excluded_candidates = &self.returned_candidates | excluded_candidates;
|
||||||
let mut criterion_parameters = CriterionParameters {
|
let mut criterion_parameters = CriterionParameters {
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
|
use crate::Result;
|
||||||
use crate::search::query_tree::Operation;
|
use crate::search::query_tree::Operation;
|
||||||
|
|
||||||
use super::{Criterion, CriterionResult, CriterionParameters};
|
use super::{Criterion, CriterionResult, CriterionParameters};
|
||||||
|
|
||||||
pub struct Initial {
|
pub struct Initial {
|
||||||
@ -22,7 +22,7 @@ impl Initial {
|
|||||||
|
|
||||||
impl Criterion for Initial {
|
impl Criterion for Initial {
|
||||||
#[logging_timer::time("Initial::{}")]
|
#[logging_timer::time("Initial::{}")]
|
||||||
fn next(&mut self, _: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> {
|
fn next(&mut self, _: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
|
||||||
Ok(self.answer.take())
|
Ok(self.answer.take())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -4,7 +4,7 @@ use std::borrow::Cow;
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::{FieldId, TreeLevel, search::{word_derivations, WordDerivationsCache}};
|
use crate::{FieldId, TreeLevel, search::{word_derivations, WordDerivationsCache}};
|
||||||
use crate::{Index, DocumentId};
|
use crate::{Index, DocumentId, Result};
|
||||||
|
|
||||||
use super::query_tree::{Operation, PrimitiveQueryPart, Query, QueryKind};
|
use super::query_tree::{Operation, PrimitiveQueryPart, Query, QueryKind};
|
||||||
use self::asc_desc::AscDesc;
|
use self::asc_desc::AscDesc;
|
||||||
@ -26,7 +26,7 @@ mod words;
|
|||||||
pub mod r#final;
|
pub mod r#final;
|
||||||
|
|
||||||
pub trait Criterion {
|
pub trait Criterion {
|
||||||
fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>>;
|
fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>>;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The result of a call to the parent criterion.
|
/// The result of a call to the parent criterion.
|
||||||
@ -76,10 +76,11 @@ pub trait Context<'c> {
|
|||||||
fn word_position_iterator(&self, word: &str, level: TreeLevel, in_prefix_cache: bool, left: Option<u32>, right: Option<u32>) -> heed::Result<Box<dyn Iterator<Item =heed::Result<((&'c str, TreeLevel, u32, u32), RoaringBitmap)>> + 'c>>;
|
fn word_position_iterator(&self, word: &str, level: TreeLevel, in_prefix_cache: bool, left: Option<u32>, right: Option<u32>) -> heed::Result<Box<dyn Iterator<Item =heed::Result<((&'c str, TreeLevel, u32, u32), RoaringBitmap)>> + 'c>>;
|
||||||
fn word_position_last_level(&self, word: &str, in_prefix_cache: bool) -> heed::Result<Option<TreeLevel>>;
|
fn word_position_last_level(&self, word: &str, in_prefix_cache: bool) -> heed::Result<Option<TreeLevel>>;
|
||||||
fn synonyms(&self, word: &str) -> heed::Result<Option<Vec<Vec<String>>>>;
|
fn synonyms(&self, word: &str) -> heed::Result<Option<Vec<Vec<String>>>>;
|
||||||
fn searchable_fields_ids(&self) -> heed::Result<Vec<FieldId>>;
|
fn searchable_fields_ids(&self) -> Result<Vec<FieldId>>;
|
||||||
fn field_id_word_count_docids(&self, field_id: FieldId, word_count: u8) -> heed::Result<Option<RoaringBitmap>>;
|
fn field_id_word_count_docids(&self, field_id: FieldId, word_count: u8) -> heed::Result<Option<RoaringBitmap>>;
|
||||||
fn word_level_position_docids(&self, word: &str, level: TreeLevel, left: u32, right: u32) -> Result<Option<RoaringBitmap>, heed::Error>;
|
fn word_level_position_docids(&self, word: &str, level: TreeLevel, left: u32, right: u32) -> heed::Result<Option<RoaringBitmap>>;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct CriteriaBuilder<'t> {
|
pub struct CriteriaBuilder<'t> {
|
||||||
rtxn: &'t heed::RoTxn<'t>,
|
rtxn: &'t heed::RoTxn<'t>,
|
||||||
index: &'t Index,
|
index: &'t Index,
|
||||||
@ -173,7 +174,7 @@ impl<'c> Context<'c> for CriteriaBuilder<'c> {
|
|||||||
self.index.words_synonyms(self.rtxn, &[word])
|
self.index.words_synonyms(self.rtxn, &[word])
|
||||||
}
|
}
|
||||||
|
|
||||||
fn searchable_fields_ids(&self) -> heed::Result<Vec<FieldId>> {
|
fn searchable_fields_ids(&self) -> Result<Vec<FieldId>> {
|
||||||
match self.index.searchable_fields_ids(self.rtxn)? {
|
match self.index.searchable_fields_ids(self.rtxn)? {
|
||||||
Some(searchable_fields_ids) => Ok(searchable_fields_ids),
|
Some(searchable_fields_ids) => Ok(searchable_fields_ids),
|
||||||
None => Ok(self.index.fields_ids_map(self.rtxn)?.ids().collect()),
|
None => Ok(self.index.fields_ids_map(self.rtxn)?.ids().collect()),
|
||||||
@ -185,14 +186,14 @@ impl<'c> Context<'c> for CriteriaBuilder<'c> {
|
|||||||
self.index.field_id_word_count_docids.get(self.rtxn, &key)
|
self.index.field_id_word_count_docids.get(self.rtxn, &key)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn word_level_position_docids(&self, word: &str, level: TreeLevel, left: u32, right: u32) -> Result<Option<RoaringBitmap>, heed::Error> {
|
fn word_level_position_docids(&self, word: &str, level: TreeLevel, left: u32, right: u32) -> heed::Result<Option<RoaringBitmap>> {
|
||||||
let key = (word, level, left, right);
|
let key = (word, level, left, right);
|
||||||
self.index.word_level_position_docids.get(self.rtxn, &key)
|
self.index.word_level_position_docids.get(self.rtxn, &key)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'t> CriteriaBuilder<'t> {
|
impl<'t> CriteriaBuilder<'t> {
|
||||||
pub fn new(rtxn: &'t heed::RoTxn<'t>, index: &'t Index) -> anyhow::Result<Self> {
|
pub fn new(rtxn: &'t heed::RoTxn<'t>, index: &'t Index) -> Result<Self> {
|
||||||
let words_fst = index.words_fst(rtxn)?;
|
let words_fst = index.words_fst(rtxn)?;
|
||||||
let words_prefixes_fst = index.words_prefixes_fst(rtxn)?;
|
let words_prefixes_fst = index.words_prefixes_fst(rtxn)?;
|
||||||
Ok(Self { rtxn, index, words_fst, words_prefixes_fst })
|
Ok(Self { rtxn, index, words_fst, words_prefixes_fst })
|
||||||
@ -203,7 +204,7 @@ impl<'t> CriteriaBuilder<'t> {
|
|||||||
query_tree: Option<Operation>,
|
query_tree: Option<Operation>,
|
||||||
primitive_query: Option<Vec<PrimitiveQueryPart>>,
|
primitive_query: Option<Vec<PrimitiveQueryPart>>,
|
||||||
filtered_candidates: Option<RoaringBitmap>,
|
filtered_candidates: Option<RoaringBitmap>,
|
||||||
) -> anyhow::Result<Final<'t>>
|
) -> Result<Final<'t>>
|
||||||
{
|
{
|
||||||
use crate::criterion::Criterion as Name;
|
use crate::criterion::Criterion as Name;
|
||||||
|
|
||||||
@ -230,13 +231,13 @@ pub fn resolve_query_tree<'t>(
|
|||||||
ctx: &'t dyn Context,
|
ctx: &'t dyn Context,
|
||||||
query_tree: &Operation,
|
query_tree: &Operation,
|
||||||
wdcache: &mut WordDerivationsCache,
|
wdcache: &mut WordDerivationsCache,
|
||||||
) -> anyhow::Result<RoaringBitmap>
|
) -> Result<RoaringBitmap>
|
||||||
{
|
{
|
||||||
fn resolve_operation<'t>(
|
fn resolve_operation<'t>(
|
||||||
ctx: &'t dyn Context,
|
ctx: &'t dyn Context,
|
||||||
query_tree: &Operation,
|
query_tree: &Operation,
|
||||||
wdcache: &mut WordDerivationsCache,
|
wdcache: &mut WordDerivationsCache,
|
||||||
) -> anyhow::Result<RoaringBitmap>
|
) -> Result<RoaringBitmap>
|
||||||
{
|
{
|
||||||
use Operation::{And, Phrase, Or, Query};
|
use Operation::{And, Phrase, Or, Query};
|
||||||
|
|
||||||
@ -244,7 +245,7 @@ pub fn resolve_query_tree<'t>(
|
|||||||
And(ops) => {
|
And(ops) => {
|
||||||
let mut ops = ops.iter().map(|op| {
|
let mut ops = ops.iter().map(|op| {
|
||||||
resolve_operation(ctx, op, wdcache)
|
resolve_operation(ctx, op, wdcache)
|
||||||
}).collect::<anyhow::Result<Vec<_>>>()?;
|
}).collect::<Result<Vec<_>>>()?;
|
||||||
|
|
||||||
ops.sort_unstable_by_key(|cds| cds.len());
|
ops.sort_unstable_by_key(|cds| cds.len());
|
||||||
|
|
||||||
@ -302,7 +303,7 @@ fn all_word_pair_proximity_docids<T: AsRef<str>, U: AsRef<str>>(
|
|||||||
left_words: &[(T, u8)],
|
left_words: &[(T, u8)],
|
||||||
right_words: &[(U, u8)],
|
right_words: &[(U, u8)],
|
||||||
proximity: u8
|
proximity: u8
|
||||||
) -> anyhow::Result<RoaringBitmap>
|
) -> Result<RoaringBitmap>
|
||||||
{
|
{
|
||||||
let mut docids = RoaringBitmap::new();
|
let mut docids = RoaringBitmap::new();
|
||||||
for (left, _l_typo) in left_words {
|
for (left, _l_typo) in left_words {
|
||||||
@ -318,7 +319,7 @@ fn query_docids(
|
|||||||
ctx: &dyn Context,
|
ctx: &dyn Context,
|
||||||
query: &Query,
|
query: &Query,
|
||||||
wdcache: &mut WordDerivationsCache,
|
wdcache: &mut WordDerivationsCache,
|
||||||
) -> anyhow::Result<RoaringBitmap>
|
) -> Result<RoaringBitmap>
|
||||||
{
|
{
|
||||||
match &query.kind {
|
match &query.kind {
|
||||||
QueryKind::Exact { word, .. } => {
|
QueryKind::Exact { word, .. } => {
|
||||||
@ -354,7 +355,7 @@ fn query_pair_proximity_docids(
|
|||||||
right: &Query,
|
right: &Query,
|
||||||
proximity: u8,
|
proximity: u8,
|
||||||
wdcache: &mut WordDerivationsCache,
|
wdcache: &mut WordDerivationsCache,
|
||||||
) -> anyhow::Result<RoaringBitmap>
|
) -> Result<RoaringBitmap>
|
||||||
{
|
{
|
||||||
if proximity >= 8 {
|
if proximity >= 8 {
|
||||||
let mut candidates = query_docids(ctx, left, wdcache)?;
|
let mut candidates = query_docids(ctx, left, wdcache)?;
|
||||||
@ -477,11 +478,11 @@ pub mod test {
|
|||||||
todo!()
|
todo!()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn searchable_fields_ids(&self) -> heed::Result<Vec<FieldId>> {
|
fn searchable_fields_ids(&self) -> Result<Vec<FieldId>> {
|
||||||
todo!()
|
todo!()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn word_level_position_docids(&self, _word: &str, _level: TreeLevel, _left: u32, _right: u32) -> Result<Option<RoaringBitmap>, heed::Error> {
|
fn word_level_position_docids(&self, _word: &str, _level: TreeLevel, _left: u32, _right: u32) -> heed::Result<Option<RoaringBitmap>> {
|
||||||
todo!()
|
todo!()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5,9 +5,10 @@ use std::mem::take;
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use log::debug;
|
use log::debug;
|
||||||
|
|
||||||
use crate::{DocumentId, Position, search::{query_tree::QueryKind}};
|
|
||||||
use crate::search::query_tree::{maximum_proximity, Operation, Query};
|
use crate::search::query_tree::{maximum_proximity, Operation, Query};
|
||||||
use crate::search::{build_dfa, WordDerivationsCache};
|
use crate::search::{build_dfa, WordDerivationsCache};
|
||||||
|
use crate::search::{query_tree::QueryKind};
|
||||||
|
use crate::{DocumentId, Position, Result};
|
||||||
use super::{
|
use super::{
|
||||||
Context,
|
Context,
|
||||||
Criterion,
|
Criterion,
|
||||||
@ -55,7 +56,7 @@ impl<'t> Proximity<'t> {
|
|||||||
|
|
||||||
impl<'t> Criterion for Proximity<'t> {
|
impl<'t> Criterion for Proximity<'t> {
|
||||||
#[logging_timer::time("Proximity::{}")]
|
#[logging_timer::time("Proximity::{}")]
|
||||||
fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> {
|
fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
|
||||||
// remove excluded candidates when next is called, instead of doing it in the loop.
|
// remove excluded candidates when next is called, instead of doing it in the loop.
|
||||||
if let Some((_, _, allowed_candidates)) = self.state.as_mut() {
|
if let Some((_, _, allowed_candidates)) = self.state.as_mut() {
|
||||||
*allowed_candidates -= params.excluded_candidates;
|
*allowed_candidates -= params.excluded_candidates;
|
||||||
@ -161,7 +162,7 @@ fn resolve_candidates<'t>(
|
|||||||
proximity: u8,
|
proximity: u8,
|
||||||
cache: &mut Cache,
|
cache: &mut Cache,
|
||||||
wdcache: &mut WordDerivationsCache,
|
wdcache: &mut WordDerivationsCache,
|
||||||
) -> anyhow::Result<RoaringBitmap>
|
) -> Result<RoaringBitmap>
|
||||||
{
|
{
|
||||||
fn resolve_operation<'t>(
|
fn resolve_operation<'t>(
|
||||||
ctx: &'t dyn Context,
|
ctx: &'t dyn Context,
|
||||||
@ -169,7 +170,7 @@ fn resolve_candidates<'t>(
|
|||||||
proximity: u8,
|
proximity: u8,
|
||||||
cache: &mut Cache,
|
cache: &mut Cache,
|
||||||
wdcache: &mut WordDerivationsCache,
|
wdcache: &mut WordDerivationsCache,
|
||||||
) -> anyhow::Result<Vec<(Query, Query, RoaringBitmap)>>
|
) -> Result<Vec<(Query, Query, RoaringBitmap)>>
|
||||||
{
|
{
|
||||||
use Operation::{And, Phrase, Or};
|
use Operation::{And, Phrase, Or};
|
||||||
|
|
||||||
@ -227,7 +228,7 @@ fn resolve_candidates<'t>(
|
|||||||
proximity: u8,
|
proximity: u8,
|
||||||
cache: &mut Cache,
|
cache: &mut Cache,
|
||||||
wdcache: &mut WordDerivationsCache,
|
wdcache: &mut WordDerivationsCache,
|
||||||
) -> anyhow::Result<Vec<(Query, Query, RoaringBitmap)>>
|
) -> Result<Vec<(Query, Query, RoaringBitmap)>>
|
||||||
{
|
{
|
||||||
fn pair_combinations(mana: u8, left_max: u8) -> impl Iterator<Item = (u8, u8)> {
|
fn pair_combinations(mana: u8, left_max: u8) -> impl Iterator<Item = (u8, u8)> {
|
||||||
(0..=mana.min(left_max)).map(move |m| (m, mana - m))
|
(0..=mana.min(left_max)).map(move |m| (m, mana - m))
|
||||||
@ -281,7 +282,7 @@ fn resolve_candidates<'t>(
|
|||||||
proximity: u8,
|
proximity: u8,
|
||||||
cache: &mut Cache,
|
cache: &mut Cache,
|
||||||
wdcache: &mut WordDerivationsCache,
|
wdcache: &mut WordDerivationsCache,
|
||||||
) -> anyhow::Result<Vec<(Query, Query, RoaringBitmap)>>
|
) -> Result<Vec<(Query, Query, RoaringBitmap)>>
|
||||||
{
|
{
|
||||||
// Extract the first two elements but gives the tail
|
// Extract the first two elements but gives the tail
|
||||||
// that is just after the first element.
|
// that is just after the first element.
|
||||||
@ -324,13 +325,13 @@ fn resolve_plane_sweep_candidates(
|
|||||||
query_tree: &Operation,
|
query_tree: &Operation,
|
||||||
allowed_candidates: &RoaringBitmap,
|
allowed_candidates: &RoaringBitmap,
|
||||||
wdcache: &mut WordDerivationsCache,
|
wdcache: &mut WordDerivationsCache,
|
||||||
) -> anyhow::Result<BTreeMap<u8, RoaringBitmap>>
|
) -> Result<BTreeMap<u8, RoaringBitmap>>
|
||||||
{
|
{
|
||||||
/// FIXME may be buggy with query like "new new york"
|
/// FIXME may be buggy with query like "new new york"
|
||||||
fn plane_sweep(
|
fn plane_sweep(
|
||||||
groups_positions: Vec<Vec<(Position, u8, Position)>>,
|
groups_positions: Vec<Vec<(Position, u8, Position)>>,
|
||||||
consecutive: bool,
|
consecutive: bool,
|
||||||
) -> anyhow::Result<Vec<(Position, u8, Position)>>
|
) -> Result<Vec<(Position, u8, Position)>>
|
||||||
{
|
{
|
||||||
fn compute_groups_proximity(
|
fn compute_groups_proximity(
|
||||||
groups: &[(usize, (Position, u8, Position))],
|
groups: &[(usize, (Position, u8, Position))],
|
||||||
@ -451,7 +452,7 @@ fn resolve_plane_sweep_candidates(
|
|||||||
rocache: &mut HashMap<&'a Operation, Vec<(Position, u8, Position)>>,
|
rocache: &mut HashMap<&'a Operation, Vec<(Position, u8, Position)>>,
|
||||||
words_positions: &HashMap<String, RoaringBitmap>,
|
words_positions: &HashMap<String, RoaringBitmap>,
|
||||||
wdcache: &mut WordDerivationsCache,
|
wdcache: &mut WordDerivationsCache,
|
||||||
) -> anyhow::Result<Vec<(Position, u8, Position)>>
|
) -> Result<Vec<(Position, u8, Position)>>
|
||||||
{
|
{
|
||||||
use Operation::{And, Phrase, Or};
|
use Operation::{And, Phrase, Or};
|
||||||
|
|
||||||
|
@ -5,6 +5,7 @@ use roaring::RoaringBitmap;
|
|||||||
|
|
||||||
use crate::search::query_tree::{maximum_typo, Operation, Query, QueryKind};
|
use crate::search::query_tree::{maximum_typo, Operation, Query, QueryKind};
|
||||||
use crate::search::{word_derivations, WordDerivationsCache};
|
use crate::search::{word_derivations, WordDerivationsCache};
|
||||||
|
use crate::Result;
|
||||||
use super::{
|
use super::{
|
||||||
Candidates,
|
Candidates,
|
||||||
Context,
|
Context,
|
||||||
@ -43,7 +44,7 @@ impl<'t> Typo<'t> {
|
|||||||
|
|
||||||
impl<'t> Criterion for Typo<'t> {
|
impl<'t> Criterion for Typo<'t> {
|
||||||
#[logging_timer::time("Typo::{}")]
|
#[logging_timer::time("Typo::{}")]
|
||||||
fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> {
|
fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
|
||||||
use Candidates::{Allowed, Forbidden};
|
use Candidates::{Allowed, Forbidden};
|
||||||
// remove excluded candidates when next is called, instead of doing it in the loop.
|
// remove excluded candidates when next is called, instead of doing it in the loop.
|
||||||
match self.state.as_mut() {
|
match self.state.as_mut() {
|
||||||
@ -163,14 +164,14 @@ fn alterate_query_tree(
|
|||||||
mut query_tree: Operation,
|
mut query_tree: Operation,
|
||||||
number_typos: u8,
|
number_typos: u8,
|
||||||
wdcache: &mut WordDerivationsCache,
|
wdcache: &mut WordDerivationsCache,
|
||||||
) -> anyhow::Result<Operation>
|
) -> Result<Operation>
|
||||||
{
|
{
|
||||||
fn recurse(
|
fn recurse(
|
||||||
words_fst: &fst::Set<Cow<[u8]>>,
|
words_fst: &fst::Set<Cow<[u8]>>,
|
||||||
operation: &mut Operation,
|
operation: &mut Operation,
|
||||||
number_typos: u8,
|
number_typos: u8,
|
||||||
wdcache: &mut WordDerivationsCache,
|
wdcache: &mut WordDerivationsCache,
|
||||||
) -> anyhow::Result<()>
|
) -> Result<()>
|
||||||
{
|
{
|
||||||
use Operation::{And, Phrase, Or};
|
use Operation::{And, Phrase, Or};
|
||||||
|
|
||||||
@ -218,7 +219,7 @@ fn resolve_candidates<'t>(
|
|||||||
number_typos: u8,
|
number_typos: u8,
|
||||||
cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
|
cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
|
||||||
wdcache: &mut WordDerivationsCache,
|
wdcache: &mut WordDerivationsCache,
|
||||||
) -> anyhow::Result<RoaringBitmap>
|
) -> Result<RoaringBitmap>
|
||||||
{
|
{
|
||||||
fn resolve_operation<'t>(
|
fn resolve_operation<'t>(
|
||||||
ctx: &'t dyn Context,
|
ctx: &'t dyn Context,
|
||||||
@ -226,7 +227,7 @@ fn resolve_candidates<'t>(
|
|||||||
number_typos: u8,
|
number_typos: u8,
|
||||||
cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
|
cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
|
||||||
wdcache: &mut WordDerivationsCache,
|
wdcache: &mut WordDerivationsCache,
|
||||||
) -> anyhow::Result<RoaringBitmap>
|
) -> Result<RoaringBitmap>
|
||||||
{
|
{
|
||||||
use Operation::{And, Phrase, Or, Query};
|
use Operation::{And, Phrase, Or, Query};
|
||||||
|
|
||||||
@ -277,7 +278,7 @@ fn resolve_candidates<'t>(
|
|||||||
mana: u8,
|
mana: u8,
|
||||||
cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
|
cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
|
||||||
wdcache: &mut WordDerivationsCache,
|
wdcache: &mut WordDerivationsCache,
|
||||||
) -> anyhow::Result<RoaringBitmap>
|
) -> Result<RoaringBitmap>
|
||||||
{
|
{
|
||||||
match branches.split_first() {
|
match branches.split_first() {
|
||||||
Some((head, [])) => {
|
Some((head, [])) => {
|
||||||
|
@ -4,6 +4,7 @@ use log::debug;
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::search::query_tree::Operation;
|
use crate::search::query_tree::Operation;
|
||||||
|
use crate::Result;
|
||||||
use super::{Context, Criterion, CriterionParameters, CriterionResult, resolve_query_tree};
|
use super::{Context, Criterion, CriterionParameters, CriterionResult, resolve_query_tree};
|
||||||
|
|
||||||
pub struct Words<'t> {
|
pub struct Words<'t> {
|
||||||
@ -30,7 +31,7 @@ impl<'t> Words<'t> {
|
|||||||
|
|
||||||
impl<'t> Criterion for Words<'t> {
|
impl<'t> Criterion for Words<'t> {
|
||||||
#[logging_timer::time("Words::{}")]
|
#[logging_timer::time("Words::{}")]
|
||||||
fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> {
|
fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
|
||||||
// remove excluded candidates when next is called, instead of doing it in the loop.
|
// remove excluded candidates when next is called, instead of doing it in the loop.
|
||||||
if let Some(candidates) = self.candidates.as_mut() {
|
if let Some(candidates) = self.candidates.as_mut() {
|
||||||
*candidates -= params.excluded_candidates;
|
*candidates -= params.excluded_candidates;
|
||||||
|
@ -3,9 +3,11 @@ use std::mem::size_of;
|
|||||||
use heed::types::ByteSlice;
|
use heed::types::ByteSlice;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::{Distinct, DocIter};
|
use crate::error::InternalError;
|
||||||
use crate::heed_codec::facet::*;
|
use crate::heed_codec::facet::*;
|
||||||
use crate::{DocumentId, FieldId, Index};
|
use crate::index::db_name;
|
||||||
|
use crate::{DocumentId, FieldId, Index, Result};
|
||||||
|
use super::{Distinct, DocIter};
|
||||||
|
|
||||||
const FID_SIZE: usize = size_of::<FieldId>();
|
const FID_SIZE: usize = size_of::<FieldId>();
|
||||||
const DOCID_SIZE: usize = size_of::<DocumentId>();
|
const DOCID_SIZE: usize = size_of::<DocumentId>();
|
||||||
@ -57,14 +59,17 @@ impl<'a> FacetDistinctIter<'a> {
|
|||||||
.get(self.txn, &(self.distinct, 0, key, key))
|
.get(self.txn, &(self.distinct, 0, key, key))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn distinct_string(&mut self, id: DocumentId) -> anyhow::Result<()> {
|
fn distinct_string(&mut self, id: DocumentId) -> Result<()> {
|
||||||
let iter = facet_string_values(id, self.distinct, self.index, self.txn)?;
|
let iter = facet_string_values(id, self.distinct, self.index, self.txn)?;
|
||||||
|
|
||||||
for item in iter {
|
for item in iter {
|
||||||
let ((_, _, value), _) = item?;
|
let ((_, _, value), _) = item?;
|
||||||
let facet_docids = self
|
let facet_docids = self
|
||||||
.facet_string_docids(value)?
|
.facet_string_docids(value)?
|
||||||
.expect("Corrupted data: Facet values must exist");
|
.ok_or(InternalError::DatabaseMissingEntry {
|
||||||
|
db_name: db_name::FACET_ID_STRING_DOCIDS,
|
||||||
|
key: None,
|
||||||
|
})?;
|
||||||
self.excluded.union_with(&facet_docids);
|
self.excluded.union_with(&facet_docids);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -73,14 +78,17 @@ impl<'a> FacetDistinctIter<'a> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn distinct_number(&mut self, id: DocumentId) -> anyhow::Result<()> {
|
fn distinct_number(&mut self, id: DocumentId) -> Result<()> {
|
||||||
let iter = facet_number_values(id, self.distinct, self.index, self.txn)?;
|
let iter = facet_number_values(id, self.distinct, self.index, self.txn)?;
|
||||||
|
|
||||||
for item in iter {
|
for item in iter {
|
||||||
let ((_, _, value), _) = item?;
|
let ((_, _, value), _) = item?;
|
||||||
let facet_docids = self
|
let facet_docids = self
|
||||||
.facet_number_docids(value)?
|
.facet_number_docids(value)?
|
||||||
.expect("Corrupted data: Facet values must exist");
|
.ok_or(InternalError::DatabaseMissingEntry {
|
||||||
|
db_name: db_name::FACET_ID_F64_DOCIDS,
|
||||||
|
key: None,
|
||||||
|
})?;
|
||||||
self.excluded.union_with(&facet_docids);
|
self.excluded.union_with(&facet_docids);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -92,7 +100,7 @@ impl<'a> FacetDistinctIter<'a> {
|
|||||||
/// Performs the next iteration of the facet distinct. This is a convenience method that is
|
/// Performs the next iteration of the facet distinct. This is a convenience method that is
|
||||||
/// called by the Iterator::next implementation that transposes the result. It makes error
|
/// called by the Iterator::next implementation that transposes the result. It makes error
|
||||||
/// handling easier.
|
/// handling easier.
|
||||||
fn next_inner(&mut self) -> anyhow::Result<Option<DocumentId>> {
|
fn next_inner(&mut self) -> Result<Option<DocumentId>> {
|
||||||
// The first step is to remove all the excluded documents from our candidates
|
// The first step is to remove all the excluded documents from our candidates
|
||||||
self.candidates.difference_with(&self.excluded);
|
self.candidates.difference_with(&self.excluded);
|
||||||
|
|
||||||
@ -129,7 +137,7 @@ fn facet_number_values<'a>(
|
|||||||
distinct: FieldId,
|
distinct: FieldId,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
txn: &'a heed::RoTxn,
|
txn: &'a heed::RoTxn,
|
||||||
) -> anyhow::Result<heed::RoPrefix<'a, FieldDocIdFacetF64Codec, heed::types::Unit>> {
|
) -> Result<heed::RoPrefix<'a, FieldDocIdFacetF64Codec, heed::types::Unit>> {
|
||||||
let key = facet_values_prefix_key(distinct, id);
|
let key = facet_values_prefix_key(distinct, id);
|
||||||
|
|
||||||
let iter = index
|
let iter = index
|
||||||
@ -146,7 +154,7 @@ fn facet_string_values<'a>(
|
|||||||
distinct: FieldId,
|
distinct: FieldId,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
txn: &'a heed::RoTxn,
|
txn: &'a heed::RoTxn,
|
||||||
) -> anyhow::Result<heed::RoPrefix<'a, FieldDocIdFacetStringCodec, heed::types::Unit>> {
|
) -> Result<heed::RoPrefix<'a, FieldDocIdFacetStringCodec, heed::types::Unit>> {
|
||||||
let key = facet_values_prefix_key(distinct, id);
|
let key = facet_values_prefix_key(distinct, id);
|
||||||
|
|
||||||
let iter = index
|
let iter = index
|
||||||
@ -159,7 +167,7 @@ fn facet_string_values<'a>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Iterator for FacetDistinctIter<'_> {
|
impl Iterator for FacetDistinctIter<'_> {
|
||||||
type Item = anyhow::Result<DocumentId>;
|
type Item = Result<DocumentId>;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
self.next_inner().transpose()
|
self.next_inner().transpose()
|
||||||
|
@ -3,13 +3,13 @@ mod noop_distinct;
|
|||||||
|
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::DocumentId;
|
use crate::{DocumentId, Result};
|
||||||
pub use facet_distinct::FacetDistinct;
|
pub use facet_distinct::FacetDistinct;
|
||||||
pub use noop_distinct::NoopDistinct;
|
pub use noop_distinct::NoopDistinct;
|
||||||
|
|
||||||
/// A trait implemented by document interators that are returned by calls to `Distinct::distinct`.
|
/// A trait implemented by document interators that are returned by calls to `Distinct::distinct`.
|
||||||
/// It provides a way to get back the ownership to the excluded set.
|
/// It provides a way to get back the ownership to the excluded set.
|
||||||
pub trait DocIter: Iterator<Item = anyhow::Result<DocumentId>> {
|
pub trait DocIter: Iterator<Item = Result<DocumentId>> {
|
||||||
/// Returns ownership on the internal exluded set.
|
/// Returns ownership on the internal exluded set.
|
||||||
fn into_excluded(self) -> RoaringBitmap;
|
fn into_excluded(self) -> RoaringBitmap;
|
||||||
}
|
}
|
||||||
@ -106,7 +106,7 @@ mod test {
|
|||||||
|
|
||||||
/// Checks that all the candidates are distinct, and returns the candidates number.
|
/// Checks that all the candidates are distinct, and returns the candidates number.
|
||||||
pub(crate) fn validate_distinct_candidates(
|
pub(crate) fn validate_distinct_candidates(
|
||||||
candidates: impl Iterator<Item=anyhow::Result<DocumentId>>,
|
candidates: impl Iterator<Item = crate::Result<DocumentId>>,
|
||||||
distinct: FieldId,
|
distinct: FieldId,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
) -> usize {
|
) -> usize {
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
use roaring::{RoaringBitmap, bitmap::IntoIter};
|
use roaring::{RoaringBitmap, bitmap::IntoIter};
|
||||||
|
|
||||||
use crate::DocumentId;
|
use crate::{DocumentId, Result};
|
||||||
use super::{DocIter, Distinct};
|
use super::{DocIter, Distinct};
|
||||||
|
|
||||||
/// A distinct implementer that does not perform any distinct,
|
/// A distinct implementer that does not perform any distinct,
|
||||||
@ -13,7 +13,7 @@ pub struct NoopDistinctIter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Iterator for NoopDistinctIter {
|
impl Iterator for NoopDistinctIter {
|
||||||
type Item = anyhow::Result<DocumentId>;
|
type Item = Result<DocumentId>;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
self.candidates.next().map(Ok)
|
self.candidates.next().map(Ok)
|
||||||
|
@ -2,15 +2,15 @@ use std::collections::{HashSet, BTreeMap};
|
|||||||
use std::ops::Bound::Unbounded;
|
use std::ops::Bound::Unbounded;
|
||||||
use std::{cmp, fmt};
|
use std::{cmp, fmt};
|
||||||
|
|
||||||
use anyhow::Context;
|
|
||||||
use heed::{Database, BytesDecode};
|
use heed::{Database, BytesDecode};
|
||||||
use heed::types::{ByteSlice, Unit};
|
use heed::types::{ByteSlice, Unit};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
|
use crate::error::FieldIdMapMissingEntry;
|
||||||
use crate::facet::FacetType;
|
use crate::facet::FacetType;
|
||||||
use crate::heed_codec::facet::FacetValueStringCodec;
|
use crate::heed_codec::facet::FacetValueStringCodec;
|
||||||
use crate::search::facet::{FacetIter, FacetRange};
|
use crate::search::facet::{FacetIter, FacetRange};
|
||||||
use crate::{Index, FieldId, DocumentId};
|
use crate::{Index, FieldId, DocumentId, Result};
|
||||||
|
|
||||||
/// The default number of values by facets that will
|
/// The default number of values by facets that will
|
||||||
/// be fetched from the key-value store.
|
/// be fetched from the key-value store.
|
||||||
@ -195,14 +195,15 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn execute(&self) -> anyhow::Result<BTreeMap<String, BTreeMap<String, u64>>> {
|
pub fn execute(&self) -> Result<BTreeMap<String, BTreeMap<String, u64>>> {
|
||||||
let fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
|
let fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
|
||||||
let filterable_fields = self.index.filterable_fields(self.rtxn)?;
|
let filterable_fields = self.index.filterable_fields(self.rtxn)?;
|
||||||
|
|
||||||
let mut distribution = BTreeMap::new();
|
let mut distribution = BTreeMap::new();
|
||||||
for name in filterable_fields {
|
for name in filterable_fields {
|
||||||
let fid = fields_ids_map.id(&name).with_context(|| {
|
let fid = fields_ids_map.id(&name).ok_or_else(|| FieldIdMapMissingEntry::FieldName {
|
||||||
format!("missing field name {:?} from the fields id map", name)
|
field_name: name.clone(),
|
||||||
|
process: "FacetDistribution::execute",
|
||||||
})?;
|
})?;
|
||||||
let values = self.facet_values(fid)?;
|
let values = self.facet_values(fid)?;
|
||||||
distribution.insert(name, values);
|
distribution.insert(name, values);
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::fmt::Debug;
|
use std::fmt::Debug;
|
||||||
use std::ops::Bound::{self, Included, Excluded};
|
use std::ops::Bound::{self, Included, Excluded};
|
||||||
|
use std::result::Result as StdResult;
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
|
|
||||||
use either::Either;
|
use either::Either;
|
||||||
@ -11,8 +12,9 @@ use pest::iterators::{Pair, Pairs};
|
|||||||
use pest::Parser;
|
use pest::Parser;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
|
use crate::error::UserError;
|
||||||
use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec};
|
use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec};
|
||||||
use crate::{Index, FieldId, FieldsIdsMap, CboRoaringBitmapCodec};
|
use crate::{Index, FieldId, FieldsIdsMap, CboRoaringBitmapCodec, Result};
|
||||||
|
|
||||||
use super::FacetRange;
|
use super::FacetRange;
|
||||||
use super::parser::Rule;
|
use super::parser::Rule;
|
||||||
@ -60,7 +62,7 @@ impl FilterCondition {
|
|||||||
rtxn: &heed::RoTxn,
|
rtxn: &heed::RoTxn,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
array: I,
|
array: I,
|
||||||
) -> anyhow::Result<Option<FilterCondition>>
|
) -> Result<Option<FilterCondition>>
|
||||||
where I: IntoIterator<Item=Either<J, B>>,
|
where I: IntoIterator<Item=Either<J, B>>,
|
||||||
J: IntoIterator<Item=A>,
|
J: IntoIterator<Item=A>,
|
||||||
A: AsRef<str>,
|
A: AsRef<str>,
|
||||||
@ -104,11 +106,11 @@ impl FilterCondition {
|
|||||||
rtxn: &heed::RoTxn,
|
rtxn: &heed::RoTxn,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
expression: &str,
|
expression: &str,
|
||||||
) -> anyhow::Result<FilterCondition>
|
) -> Result<FilterCondition>
|
||||||
{
|
{
|
||||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||||
let filterable_fields = index.filterable_fields_ids(rtxn)?;
|
let filterable_fields = index.filterable_fields_ids(rtxn)?;
|
||||||
let lexed = FilterParser::parse(Rule::prgm, expression)?;
|
let lexed = FilterParser::parse(Rule::prgm, expression).map_err(UserError::InvalidFilter)?;
|
||||||
FilterCondition::from_pairs(&fields_ids_map, &filterable_fields, lexed)
|
FilterCondition::from_pairs(&fields_ids_map, &filterable_fields, lexed)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -116,7 +118,7 @@ impl FilterCondition {
|
|||||||
fim: &FieldsIdsMap,
|
fim: &FieldsIdsMap,
|
||||||
ff: &HashSet<FieldId>,
|
ff: &HashSet<FieldId>,
|
||||||
expression: Pairs<Rule>,
|
expression: Pairs<Rule>,
|
||||||
) -> anyhow::Result<Self>
|
) -> Result<Self>
|
||||||
{
|
{
|
||||||
PREC_CLIMBER.climb(
|
PREC_CLIMBER.climb(
|
||||||
expression,
|
expression,
|
||||||
@ -133,7 +135,7 @@ impl FilterCondition {
|
|||||||
Rule::term => Self::from_pairs(fim, ff, pair.into_inner()),
|
Rule::term => Self::from_pairs(fim, ff, pair.into_inner()),
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
},
|
},
|
||||||
|lhs: anyhow::Result<Self>, op: Pair<Rule>, rhs: anyhow::Result<Self>| {
|
|lhs: Result<Self>, op: Pair<Rule>, rhs: Result<Self>| {
|
||||||
match op.as_rule() {
|
match op.as_rule() {
|
||||||
Rule::or => Ok(Or(Box::new(lhs?), Box::new(rhs?))),
|
Rule::or => Ok(Or(Box::new(lhs?), Box::new(rhs?))),
|
||||||
Rule::and => Ok(And(Box::new(lhs?), Box::new(rhs?))),
|
Rule::and => Ok(And(Box::new(lhs?), Box::new(rhs?))),
|
||||||
@ -158,16 +160,17 @@ impl FilterCondition {
|
|||||||
fields_ids_map: &FieldsIdsMap,
|
fields_ids_map: &FieldsIdsMap,
|
||||||
filterable_fields: &HashSet<FieldId>,
|
filterable_fields: &HashSet<FieldId>,
|
||||||
item: Pair<Rule>,
|
item: Pair<Rule>,
|
||||||
) -> anyhow::Result<FilterCondition>
|
) -> Result<FilterCondition>
|
||||||
{
|
{
|
||||||
let mut items = item.into_inner();
|
let mut items = item.into_inner();
|
||||||
let fid = field_id(fields_ids_map, filterable_fields, &mut items)?;
|
let fid = field_id(fields_ids_map, filterable_fields, &mut items)
|
||||||
|
.map_err(UserError::InvalidFilterAttribute)?;
|
||||||
|
|
||||||
let (lresult, _) = pest_parse(items.next().unwrap());
|
let (lresult, _) = pest_parse(items.next().unwrap());
|
||||||
let (rresult, _) = pest_parse(items.next().unwrap());
|
let (rresult, _) = pest_parse(items.next().unwrap());
|
||||||
|
|
||||||
let lvalue = lresult?;
|
let lvalue = lresult.map_err(UserError::InvalidFilter)?;
|
||||||
let rvalue = rresult?;
|
let rvalue = rresult.map_err(UserError::InvalidFilter)?;
|
||||||
|
|
||||||
Ok(Operator(fid, Between(lvalue, rvalue)))
|
Ok(Operator(fid, Between(lvalue, rvalue)))
|
||||||
}
|
}
|
||||||
@ -176,10 +179,11 @@ impl FilterCondition {
|
|||||||
fields_ids_map: &FieldsIdsMap,
|
fields_ids_map: &FieldsIdsMap,
|
||||||
filterable_fields: &HashSet<FieldId>,
|
filterable_fields: &HashSet<FieldId>,
|
||||||
item: Pair<Rule>,
|
item: Pair<Rule>,
|
||||||
) -> anyhow::Result<FilterCondition>
|
) -> Result<FilterCondition>
|
||||||
{
|
{
|
||||||
let mut items = item.into_inner();
|
let mut items = item.into_inner();
|
||||||
let fid = field_id(fields_ids_map, filterable_fields, &mut items)?;
|
let fid = field_id(fields_ids_map, filterable_fields, &mut items)
|
||||||
|
.map_err(UserError::InvalidFilterAttribute)?;
|
||||||
|
|
||||||
let value = items.next().unwrap();
|
let value = items.next().unwrap();
|
||||||
let (result, svalue) = pest_parse(value);
|
let (result, svalue) = pest_parse(value);
|
||||||
@ -192,60 +196,68 @@ impl FilterCondition {
|
|||||||
fields_ids_map: &FieldsIdsMap,
|
fields_ids_map: &FieldsIdsMap,
|
||||||
filterable_fields: &HashSet<FieldId>,
|
filterable_fields: &HashSet<FieldId>,
|
||||||
item: Pair<Rule>,
|
item: Pair<Rule>,
|
||||||
) -> anyhow::Result<FilterCondition>
|
) -> Result<FilterCondition>
|
||||||
{
|
{
|
||||||
let mut items = item.into_inner();
|
let mut items = item.into_inner();
|
||||||
let fid = field_id(fields_ids_map, filterable_fields, &mut items)?;
|
let fid = field_id(fields_ids_map, filterable_fields, &mut items)
|
||||||
|
.map_err(UserError::InvalidFilterAttribute)?;
|
||||||
|
|
||||||
let value = items.next().unwrap();
|
let value = items.next().unwrap();
|
||||||
let (result, _svalue) = pest_parse(value);
|
let (result, _svalue) = pest_parse(value);
|
||||||
|
let value = result.map_err(UserError::InvalidFilter)?;
|
||||||
|
|
||||||
Ok(Operator(fid, GreaterThan(result?)))
|
Ok(Operator(fid, GreaterThan(value)))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn greater_than_or_equal(
|
fn greater_than_or_equal(
|
||||||
fields_ids_map: &FieldsIdsMap,
|
fields_ids_map: &FieldsIdsMap,
|
||||||
filterable_fields: &HashSet<FieldId>,
|
filterable_fields: &HashSet<FieldId>,
|
||||||
item: Pair<Rule>,
|
item: Pair<Rule>,
|
||||||
) -> anyhow::Result<FilterCondition>
|
) -> Result<FilterCondition>
|
||||||
{
|
{
|
||||||
let mut items = item.into_inner();
|
let mut items = item.into_inner();
|
||||||
let fid = field_id(fields_ids_map, filterable_fields, &mut items)?;
|
let fid = field_id(fields_ids_map, filterable_fields, &mut items)
|
||||||
|
.map_err(UserError::InvalidFilterAttribute)?;
|
||||||
|
|
||||||
let value = items.next().unwrap();
|
let value = items.next().unwrap();
|
||||||
let (result, _svalue) = pest_parse(value);
|
let (result, _svalue) = pest_parse(value);
|
||||||
|
let value = result.map_err(UserError::InvalidFilter)?;
|
||||||
|
|
||||||
Ok(Operator(fid, GreaterThanOrEqual(result?)))
|
Ok(Operator(fid, GreaterThanOrEqual(value)))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn lower_than(
|
fn lower_than(
|
||||||
fields_ids_map: &FieldsIdsMap,
|
fields_ids_map: &FieldsIdsMap,
|
||||||
filterable_fields: &HashSet<FieldId>,
|
filterable_fields: &HashSet<FieldId>,
|
||||||
item: Pair<Rule>,
|
item: Pair<Rule>,
|
||||||
) -> anyhow::Result<FilterCondition>
|
) -> Result<FilterCondition>
|
||||||
{
|
{
|
||||||
let mut items = item.into_inner();
|
let mut items = item.into_inner();
|
||||||
let fid = field_id(fields_ids_map, filterable_fields, &mut items)?;
|
let fid = field_id(fields_ids_map, filterable_fields, &mut items)
|
||||||
|
.map_err(UserError::InvalidFilterAttribute)?;
|
||||||
|
|
||||||
let value = items.next().unwrap();
|
let value = items.next().unwrap();
|
||||||
let (result, _svalue) = pest_parse(value);
|
let (result, _svalue) = pest_parse(value);
|
||||||
|
let value = result.map_err(UserError::InvalidFilter)?;
|
||||||
|
|
||||||
Ok(Operator(fid, LowerThan(result?)))
|
Ok(Operator(fid, LowerThan(value)))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn lower_than_or_equal(
|
fn lower_than_or_equal(
|
||||||
fields_ids_map: &FieldsIdsMap,
|
fields_ids_map: &FieldsIdsMap,
|
||||||
filterable_fields: &HashSet<FieldId>,
|
filterable_fields: &HashSet<FieldId>,
|
||||||
item: Pair<Rule>,
|
item: Pair<Rule>,
|
||||||
) -> anyhow::Result<FilterCondition>
|
) -> Result<FilterCondition>
|
||||||
{
|
{
|
||||||
let mut items = item.into_inner();
|
let mut items = item.into_inner();
|
||||||
let fid = field_id(fields_ids_map, filterable_fields, &mut items)?;
|
let fid = field_id(fields_ids_map, filterable_fields, &mut items)
|
||||||
|
.map_err(UserError::InvalidFilterAttribute)?;
|
||||||
|
|
||||||
let value = items.next().unwrap();
|
let value = items.next().unwrap();
|
||||||
let (result, _svalue) = pest_parse(value);
|
let (result, _svalue) = pest_parse(value);
|
||||||
|
let value = result.map_err(UserError::InvalidFilter)?;
|
||||||
|
|
||||||
Ok(Operator(fid, LowerThanOrEqual(result?)))
|
Ok(Operator(fid, LowerThanOrEqual(value)))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -260,7 +272,7 @@ impl FilterCondition {
|
|||||||
left: Bound<f64>,
|
left: Bound<f64>,
|
||||||
right: Bound<f64>,
|
right: Bound<f64>,
|
||||||
output: &mut RoaringBitmap,
|
output: &mut RoaringBitmap,
|
||||||
) -> anyhow::Result<()>
|
) -> Result<()>
|
||||||
{
|
{
|
||||||
match (left, right) {
|
match (left, right) {
|
||||||
// If the request is an exact value we must go directly to the deepest level.
|
// If the request is an exact value we must go directly to the deepest level.
|
||||||
@ -332,7 +344,7 @@ impl FilterCondition {
|
|||||||
strings_db: heed::Database<FacetValueStringCodec, CboRoaringBitmapCodec>,
|
strings_db: heed::Database<FacetValueStringCodec, CboRoaringBitmapCodec>,
|
||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
operator: &Operator,
|
operator: &Operator,
|
||||||
) -> anyhow::Result<RoaringBitmap>
|
) -> Result<RoaringBitmap>
|
||||||
{
|
{
|
||||||
// Make sure we always bound the ranges with the field id and the level,
|
// Make sure we always bound the ranges with the field id and the level,
|
||||||
// as the facets values are all in the same database and prefixed by the
|
// as the facets values are all in the same database and prefixed by the
|
||||||
@ -390,7 +402,7 @@ impl FilterCondition {
|
|||||||
&self,
|
&self,
|
||||||
rtxn: &heed::RoTxn,
|
rtxn: &heed::RoTxn,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
) -> anyhow::Result<RoaringBitmap>
|
) -> Result<RoaringBitmap>
|
||||||
{
|
{
|
||||||
let numbers_db = index.facet_id_f64_docids;
|
let numbers_db = index.facet_id_f64_docids;
|
||||||
let strings_db = index.facet_id_string_docids;
|
let strings_db = index.facet_id_string_docids;
|
||||||
@ -422,7 +434,7 @@ fn field_id(
|
|||||||
fields_ids_map: &FieldsIdsMap,
|
fields_ids_map: &FieldsIdsMap,
|
||||||
filterable_fields: &HashSet<FieldId>,
|
filterable_fields: &HashSet<FieldId>,
|
||||||
items: &mut Pairs<Rule>,
|
items: &mut Pairs<Rule>,
|
||||||
) -> Result<FieldId, PestError<Rule>>
|
) -> StdResult<FieldId, PestError<Rule>>
|
||||||
{
|
{
|
||||||
// lexing ensures that we at least have a key
|
// lexing ensures that we at least have a key
|
||||||
let key = items.next().unwrap();
|
let key = items.next().unwrap();
|
||||||
@ -463,7 +475,7 @@ fn field_id(
|
|||||||
/// the original string that we tried to parse.
|
/// the original string that we tried to parse.
|
||||||
///
|
///
|
||||||
/// Returns the parsing error associated with the span if the conversion fails.
|
/// Returns the parsing error associated with the span if the conversion fails.
|
||||||
fn pest_parse<T>(pair: Pair<Rule>) -> (Result<T, pest::error::Error<Rule>>, String)
|
fn pest_parse<T>(pair: Pair<Rule>) -> (StdResult<T, pest::error::Error<Rule>>, String)
|
||||||
where T: FromStr,
|
where T: FromStr,
|
||||||
T::Err: ToString,
|
T::Err: ToString,
|
||||||
{
|
{
|
||||||
|
@ -9,8 +9,9 @@ use crate::heed_codec::CboRoaringBitmapCodec;
|
|||||||
use crate::heed_codec::facet::FacetLevelValueF64Codec;
|
use crate::heed_codec::facet::FacetLevelValueF64Codec;
|
||||||
use crate::{Index, FieldId};
|
use crate::{Index, FieldId};
|
||||||
|
|
||||||
pub use self::filter_condition::{FilterCondition, Operator};
|
|
||||||
pub use self::facet_distribution::FacetDistribution;
|
pub use self::facet_distribution::FacetDistribution;
|
||||||
|
pub use self::filter_condition::{FilterCondition, Operator};
|
||||||
|
pub(crate) use self::parser::Rule as ParserRule;
|
||||||
|
|
||||||
mod filter_condition;
|
mod filter_condition;
|
||||||
mod facet_distribution;
|
mod facet_distribution;
|
||||||
|
@ -2,6 +2,7 @@ use std::borrow::Cow;
|
|||||||
use std::collections::hash_map::{Entry, HashMap};
|
use std::collections::hash_map::{Entry, HashMap};
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::mem::take;
|
use std::mem::take;
|
||||||
|
use std::result::Result as StdResult;
|
||||||
use std::str::Utf8Error;
|
use std::str::Utf8Error;
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
|
||||||
@ -12,14 +13,17 @@ use meilisearch_tokenizer::{Analyzer, AnalyzerConfig};
|
|||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use roaring::bitmap::RoaringBitmap;
|
use roaring::bitmap::RoaringBitmap;
|
||||||
|
|
||||||
use distinct::{Distinct, DocIter, FacetDistinct, NoopDistinct};
|
use crate::error::FieldIdMapMissingEntry;
|
||||||
use crate::search::criteria::r#final::{Final, FinalResult};
|
use crate::search::criteria::r#final::{Final, FinalResult};
|
||||||
use crate::{Index, DocumentId};
|
use crate::{Index, DocumentId, Result};
|
||||||
|
|
||||||
pub use self::facet::{FilterCondition, FacetDistribution, FacetIter, Operator};
|
pub use self::facet::{FilterCondition, FacetDistribution, FacetIter, Operator};
|
||||||
pub use self::matching_words::MatchingWords;
|
pub use self::matching_words::MatchingWords;
|
||||||
|
pub(crate) use self::facet::ParserRule;
|
||||||
use self::query_tree::QueryTreeBuilder;
|
use self::query_tree::QueryTreeBuilder;
|
||||||
|
|
||||||
|
use distinct::{Distinct, DocIter, FacetDistinct, NoopDistinct};
|
||||||
|
|
||||||
// Building these factories is not free.
|
// Building these factories is not free.
|
||||||
static LEVDIST0: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(0, true));
|
static LEVDIST0: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(0, true));
|
||||||
static LEVDIST1: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(1, true));
|
static LEVDIST1: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(1, true));
|
||||||
@ -93,7 +97,7 @@ impl<'a> Search<'a> {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn execute(&self) -> anyhow::Result<SearchResult> {
|
pub fn execute(&self) -> Result<SearchResult> {
|
||||||
// We create the query tree by spliting the query into tokens.
|
// We create the query tree by spliting the query into tokens.
|
||||||
let before = Instant::now();
|
let before = Instant::now();
|
||||||
let (query_tree, primitive_query) = match self.query.as_ref() {
|
let (query_tree, primitive_query) = match self.query.as_ref() {
|
||||||
@ -140,7 +144,10 @@ impl<'a> Search<'a> {
|
|||||||
None => self.perform_sort(NoopDistinct, matching_words, criteria),
|
None => self.perform_sort(NoopDistinct, matching_words, criteria),
|
||||||
Some(name) => {
|
Some(name) => {
|
||||||
let field_ids_map = self.index.fields_ids_map(self.rtxn)?;
|
let field_ids_map = self.index.fields_ids_map(self.rtxn)?;
|
||||||
let id = field_ids_map.id(name).expect("distinct not present in field map");
|
let id = field_ids_map.id(name).ok_or_else(|| FieldIdMapMissingEntry::FieldName {
|
||||||
|
field_name: name.to_string(),
|
||||||
|
process: "distinct attribute",
|
||||||
|
})?;
|
||||||
let distinct = FacetDistinct::new(id, self.index, self.rtxn);
|
let distinct = FacetDistinct::new(id, self.index, self.rtxn);
|
||||||
self.perform_sort(distinct, matching_words, criteria)
|
self.perform_sort(distinct, matching_words, criteria)
|
||||||
}
|
}
|
||||||
@ -152,7 +159,7 @@ impl<'a> Search<'a> {
|
|||||||
mut distinct: D,
|
mut distinct: D,
|
||||||
matching_words: MatchingWords,
|
matching_words: MatchingWords,
|
||||||
mut criteria: Final,
|
mut criteria: Final,
|
||||||
) -> anyhow::Result<SearchResult>
|
) -> Result<SearchResult>
|
||||||
{
|
{
|
||||||
let mut offset = self.offset;
|
let mut offset = self.offset;
|
||||||
let mut initial_candidates = RoaringBitmap::new();
|
let mut initial_candidates = RoaringBitmap::new();
|
||||||
@ -225,7 +232,7 @@ pub fn word_derivations<'c>(
|
|||||||
max_typo: u8,
|
max_typo: u8,
|
||||||
fst: &fst::Set<Cow<[u8]>>,
|
fst: &fst::Set<Cow<[u8]>>,
|
||||||
cache: &'c mut WordDerivationsCache,
|
cache: &'c mut WordDerivationsCache,
|
||||||
) -> Result<&'c [(String, u8)], Utf8Error> {
|
) -> StdResult<&'c [(String, u8)], Utf8Error> {
|
||||||
match cache.entry((word.to_string(), is_prefix, max_typo)) {
|
match cache.entry((word.to_string(), is_prefix, max_typo)) {
|
||||||
Entry::Occupied(entry) => Ok(entry.into_mut()),
|
Entry::Occupied(entry) => Ok(entry.into_mut()),
|
||||||
Entry::Vacant(entry) => {
|
Entry::Vacant(entry) => {
|
||||||
|
@ -7,7 +7,7 @@ use meilisearch_tokenizer::TokenKind;
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use slice_group_by::GroupBy;
|
use slice_group_by::GroupBy;
|
||||||
|
|
||||||
use crate::Index;
|
use crate::{Index, Result};
|
||||||
|
|
||||||
type IsOptionalWord = bool;
|
type IsOptionalWord = bool;
|
||||||
type IsPrefix = bool;
|
type IsPrefix = bool;
|
||||||
@ -219,7 +219,7 @@ impl<'a> QueryTreeBuilder<'a> {
|
|||||||
/// - if `authorize_typos` is set to `false` the query tree will be generated
|
/// - if `authorize_typos` is set to `false` the query tree will be generated
|
||||||
/// forcing all query words to match documents without any typo
|
/// forcing all query words to match documents without any typo
|
||||||
/// (the criterion `typo` will be ignored)
|
/// (the criterion `typo` will be ignored)
|
||||||
pub fn build(&self, query: TokenStream) -> anyhow::Result<Option<(Operation, PrimitiveQuery)>> {
|
pub fn build(&self, query: TokenStream) -> Result<Option<(Operation, PrimitiveQuery)>> {
|
||||||
let stop_words = self.index.stop_words(self.rtxn)?;
|
let stop_words = self.index.stop_words(self.rtxn)?;
|
||||||
let primitive_query = create_primitive_query(query, stop_words, self.words_limit);
|
let primitive_query = create_primitive_query(query, stop_words, self.words_limit);
|
||||||
if !primitive_query.is_empty() {
|
if !primitive_query.is_empty() {
|
||||||
@ -291,14 +291,14 @@ fn create_query_tree(
|
|||||||
optional_words: bool,
|
optional_words: bool,
|
||||||
authorize_typos: bool,
|
authorize_typos: bool,
|
||||||
query: &[PrimitiveQueryPart],
|
query: &[PrimitiveQueryPart],
|
||||||
) -> anyhow::Result<Operation>
|
) -> Result<Operation>
|
||||||
{
|
{
|
||||||
/// Matches on the `PrimitiveQueryPart` and create an operation from it.
|
/// Matches on the `PrimitiveQueryPart` and create an operation from it.
|
||||||
fn resolve_primitive_part(
|
fn resolve_primitive_part(
|
||||||
ctx: &impl Context,
|
ctx: &impl Context,
|
||||||
authorize_typos: bool,
|
authorize_typos: bool,
|
||||||
part: PrimitiveQueryPart,
|
part: PrimitiveQueryPart,
|
||||||
) -> anyhow::Result<Operation>
|
) -> Result<Operation>
|
||||||
{
|
{
|
||||||
match part {
|
match part {
|
||||||
// 1. try to split word in 2
|
// 1. try to split word in 2
|
||||||
@ -325,7 +325,7 @@ fn create_query_tree(
|
|||||||
ctx: &impl Context,
|
ctx: &impl Context,
|
||||||
authorize_typos: bool,
|
authorize_typos: bool,
|
||||||
query: &[PrimitiveQueryPart],
|
query: &[PrimitiveQueryPart],
|
||||||
) -> anyhow::Result<Operation>
|
) -> Result<Operation>
|
||||||
{
|
{
|
||||||
const MAX_NGRAM: usize = 3;
|
const MAX_NGRAM: usize = 3;
|
||||||
let mut op_children = Vec::new();
|
let mut op_children = Vec::new();
|
||||||
@ -379,7 +379,7 @@ fn create_query_tree(
|
|||||||
ctx: &impl Context,
|
ctx: &impl Context,
|
||||||
authorize_typos: bool,
|
authorize_typos: bool,
|
||||||
query: PrimitiveQuery,
|
query: PrimitiveQuery,
|
||||||
) -> anyhow::Result<Operation>
|
) -> Result<Operation>
|
||||||
{
|
{
|
||||||
let number_phrases = query.iter().filter(|p| p.is_phrase()).count();
|
let number_phrases = query.iter().filter(|p| p.is_phrase()).count();
|
||||||
let mut operation_children = Vec::new();
|
let mut operation_children = Vec::new();
|
||||||
@ -532,7 +532,7 @@ mod test {
|
|||||||
authorize_typos: bool,
|
authorize_typos: bool,
|
||||||
words_limit: Option<usize>,
|
words_limit: Option<usize>,
|
||||||
query: TokenStream,
|
query: TokenStream,
|
||||||
) -> anyhow::Result<Option<(Operation, PrimitiveQuery)>>
|
) -> Result<Option<(Operation, PrimitiveQuery)>>
|
||||||
{
|
{
|
||||||
let primitive_query = create_primitive_query(query, None, words_limit);
|
let primitive_query = create_primitive_query(query, None, words_limit);
|
||||||
if !primitive_query.is_empty() {
|
if !primitive_query.is_empty() {
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
use chrono::Utc;
|
use chrono::Utc;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use crate::{ExternalDocumentsIds, Index, FieldsDistribution};
|
|
||||||
|
use crate::{ExternalDocumentsIds, Index, FieldsDistribution, Result};
|
||||||
|
|
||||||
pub struct ClearDocuments<'t, 'u, 'i> {
|
pub struct ClearDocuments<'t, 'u, 'i> {
|
||||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||||
@ -18,7 +19,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
|
|||||||
ClearDocuments { wtxn, index, _update_id: update_id }
|
ClearDocuments { wtxn, index, _update_id: update_id }
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn execute(self) -> anyhow::Result<u64> {
|
pub fn execute(self) -> Result<u64> {
|
||||||
self.index.set_updated_at(self.wtxn, &Utc::now())?;
|
self.index.set_updated_at(self.wtxn, &Utc::now())?;
|
||||||
let Index {
|
let Index {
|
||||||
env: _env,
|
env: _env,
|
||||||
|
@ -1,15 +1,16 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::collections::hash_map::Entry;
|
use std::collections::hash_map::Entry;
|
||||||
|
|
||||||
use anyhow::{anyhow, Context};
|
|
||||||
use chrono::Utc;
|
use chrono::Utc;
|
||||||
use fst::IntoStreamer;
|
use fst::IntoStreamer;
|
||||||
use heed::types::{ByteSlice, Unit};
|
use heed::types::{ByteSlice, Unit};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
|
|
||||||
|
use crate::error::{InternalError, FieldIdMapMissingEntry, UserError};
|
||||||
use crate::heed_codec::CboRoaringBitmapCodec;
|
use crate::heed_codec::CboRoaringBitmapCodec;
|
||||||
use crate::{Index, DocumentId, FieldId, BEU32, SmallString32, ExternalDocumentsIds};
|
use crate::index::{db_name, main_key};
|
||||||
|
use crate::{Index, DocumentId, FieldId, BEU32, SmallString32, ExternalDocumentsIds, Result};
|
||||||
use super::ClearDocuments;
|
use super::ClearDocuments;
|
||||||
|
|
||||||
pub struct DeleteDocuments<'t, 'u, 'i> {
|
pub struct DeleteDocuments<'t, 'u, 'i> {
|
||||||
@ -25,7 +26,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||||
index: &'i Index,
|
index: &'i Index,
|
||||||
update_id: u64,
|
update_id: u64,
|
||||||
) -> anyhow::Result<DeleteDocuments<'t, 'u, 'i>>
|
) -> Result<DeleteDocuments<'t, 'u, 'i>>
|
||||||
{
|
{
|
||||||
let external_documents_ids = index
|
let external_documents_ids = index
|
||||||
.external_documents_ids(wtxn)?
|
.external_documents_ids(wtxn)?
|
||||||
@ -54,7 +55,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
Some(docid)
|
Some(docid)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn execute(self) -> anyhow::Result<u64> {
|
pub fn execute(self) -> Result<u64> {
|
||||||
self.index.set_updated_at(self.wtxn, &Utc::now())?;
|
self.index.set_updated_at(self.wtxn, &Utc::now())?;
|
||||||
// We retrieve the current documents ids that are in the database.
|
// We retrieve the current documents ids that are in the database.
|
||||||
let mut documents_ids = self.index.documents_ids(self.wtxn)?;
|
let mut documents_ids = self.index.documents_ids(self.wtxn)?;
|
||||||
@ -77,8 +78,18 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
let fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||||
let primary_key = self.index.primary_key(self.wtxn)?.context("missing primary key")?;
|
let primary_key = self.index.primary_key(self.wtxn)?.ok_or_else(|| {
|
||||||
let id_field = fields_ids_map.id(primary_key).expect(r#"the field "id" to be present"#);
|
InternalError::DatabaseMissingEntry {
|
||||||
|
db_name: db_name::MAIN,
|
||||||
|
key: Some(main_key::PRIMARY_KEY_KEY),
|
||||||
|
}
|
||||||
|
})?;
|
||||||
|
let id_field = fields_ids_map.id(primary_key).ok_or_else(|| {
|
||||||
|
FieldIdMapMissingEntry::FieldName {
|
||||||
|
field_name: primary_key.to_string(),
|
||||||
|
process: "DeleteDocuments::execute",
|
||||||
|
}
|
||||||
|
})?;
|
||||||
|
|
||||||
let Index {
|
let Index {
|
||||||
env: _env,
|
env: _env,
|
||||||
@ -119,7 +130,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
let external_id = match serde_json::from_slice(content).unwrap() {
|
let external_id = match serde_json::from_slice(content).unwrap() {
|
||||||
Value::String(string) => SmallString32::from(string.as_str()),
|
Value::String(string) => SmallString32::from(string.as_str()),
|
||||||
Value::Number(number) => SmallString32::from(number.to_string()),
|
Value::Number(number) => SmallString32::from(number.to_string()),
|
||||||
_ => return Err(anyhow!("documents ids must be either strings or numbers")),
|
document_id => return Err(UserError::InvalidDocumentId { document_id }.into()),
|
||||||
};
|
};
|
||||||
external_ids.push(external_id);
|
external_ids.push(external_id);
|
||||||
}
|
}
|
||||||
|
@ -9,11 +9,12 @@ use heed::{BytesEncode, Error};
|
|||||||
use log::debug;
|
use log::debug;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
|
use crate::error::InternalError;
|
||||||
use crate::heed_codec::CboRoaringBitmapCodec;
|
use crate::heed_codec::CboRoaringBitmapCodec;
|
||||||
use crate::heed_codec::facet::FacetLevelValueF64Codec;
|
use crate::heed_codec::facet::FacetLevelValueF64Codec;
|
||||||
use crate::Index;
|
|
||||||
use crate::update::index_documents::WriteMethod;
|
use crate::update::index_documents::WriteMethod;
|
||||||
use crate::update::index_documents::{create_writer, writer_into_reader, write_into_lmdb_database};
|
use crate::update::index_documents::{create_writer, writer_into_reader, write_into_lmdb_database};
|
||||||
|
use crate::{Index, Result};
|
||||||
|
|
||||||
pub struct Facets<'t, 'u, 'i> {
|
pub struct Facets<'t, 'u, 'i> {
|
||||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||||
@ -55,7 +56,7 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn execute(self) -> anyhow::Result<()> {
|
pub fn execute(self) -> Result<()> {
|
||||||
self.index.set_updated_at(self.wtxn, &Utc::now())?;
|
self.index.set_updated_at(self.wtxn, &Utc::now())?;
|
||||||
// We get the faceted fields to be able to create the facet levels.
|
// We get the faceted fields to be able to create the facet levels.
|
||||||
let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?;
|
let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?;
|
||||||
@ -102,7 +103,7 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
|
|||||||
self.wtxn,
|
self.wtxn,
|
||||||
*self.index.facet_id_f64_docids.as_polymorph(),
|
*self.index.facet_id_f64_docids.as_polymorph(),
|
||||||
content,
|
content,
|
||||||
|_, _| anyhow::bail!("invalid facet number level merging"),
|
|_, _| Err(InternalError::IndexingMergingKeys { process: "facet number level" }),
|
||||||
WriteMethod::GetMergePut,
|
WriteMethod::GetMergePut,
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
@ -132,7 +133,7 @@ fn compute_facet_number_levels<'t>(
|
|||||||
level_group_size: NonZeroUsize,
|
level_group_size: NonZeroUsize,
|
||||||
min_level_size: NonZeroUsize,
|
min_level_size: NonZeroUsize,
|
||||||
field_id: u8,
|
field_id: u8,
|
||||||
) -> anyhow::Result<Reader<FileFuse>>
|
) -> Result<Reader<FileFuse>>
|
||||||
{
|
{
|
||||||
let first_level_size = db
|
let first_level_size = db
|
||||||
.remap_key_type::<ByteSlice>()
|
.remap_key_type::<ByteSlice>()
|
||||||
@ -195,7 +196,7 @@ fn compute_faceted_documents_ids(
|
|||||||
rtxn: &heed::RoTxn,
|
rtxn: &heed::RoTxn,
|
||||||
db: heed::Database<ByteSlice, CboRoaringBitmapCodec>,
|
db: heed::Database<ByteSlice, CboRoaringBitmapCodec>,
|
||||||
field_id: u8,
|
field_id: u8,
|
||||||
) -> anyhow::Result<RoaringBitmap>
|
) -> Result<RoaringBitmap>
|
||||||
{
|
{
|
||||||
let mut documents_ids = RoaringBitmap::new();
|
let mut documents_ids = RoaringBitmap::new();
|
||||||
|
|
||||||
@ -214,7 +215,7 @@ fn write_number_entry(
|
|||||||
left: f64,
|
left: f64,
|
||||||
right: f64,
|
right: f64,
|
||||||
ids: &RoaringBitmap,
|
ids: &RoaringBitmap,
|
||||||
) -> anyhow::Result<()>
|
) -> Result<()>
|
||||||
{
|
{
|
||||||
let key = (field_id, level, left, right);
|
let key = (field_id, level, left, right);
|
||||||
let key = FacetLevelValueF64Codec::bytes_encode(&key).ok_or(Error::Encoding)?;
|
let key = FacetLevelValueF64Codec::bytes_encode(&key).ok_or(Error::Encoding)?;
|
||||||
|
@ -1,75 +1,43 @@
|
|||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
|
use std::result::Result as StdResult;
|
||||||
|
|
||||||
use anyhow::{bail, ensure, Context};
|
|
||||||
use bstr::ByteSlice as _;
|
|
||||||
use fst::IntoStreamer;
|
use fst::IntoStreamer;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::heed_codec::CboRoaringBitmapCodec;
|
use crate::heed_codec::CboRoaringBitmapCodec;
|
||||||
|
use crate::Result;
|
||||||
|
|
||||||
const WORDS_FST_KEY: &[u8] = crate::index::WORDS_FST_KEY.as_bytes();
|
/// Only the last value associated with an id is kept.
|
||||||
const FIELDS_IDS_MAP_KEY: &[u8] = crate::index::FIELDS_IDS_MAP_KEY.as_bytes();
|
pub fn keep_latest_obkv(_key: &[u8], obkvs: &[Cow<[u8]>]) -> Result<Vec<u8>> {
|
||||||
const DOCUMENTS_IDS_KEY: &[u8] = crate::index::DOCUMENTS_IDS_KEY.as_bytes();
|
Ok(obkvs.last().unwrap().clone().into_owned())
|
||||||
|
|
||||||
pub fn main_merge(key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
|
|
||||||
match key {
|
|
||||||
WORDS_FST_KEY => {
|
|
||||||
let fsts: Vec<_> = values.iter().map(|v| fst::Set::new(v).unwrap()).collect();
|
|
||||||
|
|
||||||
// Union of the FSTs
|
|
||||||
let mut op = fst::set::OpBuilder::new();
|
|
||||||
fsts.iter().for_each(|fst| op.push(fst.into_stream()));
|
|
||||||
let op = op.r#union();
|
|
||||||
|
|
||||||
let mut build = fst::SetBuilder::memory();
|
|
||||||
build.extend_stream(op.into_stream()).unwrap();
|
|
||||||
Ok(build.into_inner().unwrap())
|
|
||||||
},
|
|
||||||
FIELDS_IDS_MAP_KEY => {
|
|
||||||
ensure!(values.windows(2).all(|vs| vs[0] == vs[1]), "fields ids map doesn't match");
|
|
||||||
Ok(values[0].to_vec())
|
|
||||||
},
|
|
||||||
DOCUMENTS_IDS_KEY => roaring_bitmap_merge(values),
|
|
||||||
otherwise => bail!("wut {:?}", otherwise),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn word_docids_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
|
/// Merge all the obks in the order we see them.
|
||||||
roaring_bitmap_merge(values)
|
pub fn merge_obkvs(_key: &[u8], obkvs: &[Cow<[u8]>]) -> Result<Vec<u8>> {
|
||||||
|
let mut iter = obkvs.iter();
|
||||||
|
let first = iter.next().map(|b| b.clone().into_owned()).unwrap();
|
||||||
|
Ok(iter.fold(first, |acc, current| {
|
||||||
|
let first = obkv::KvReader::new(&acc);
|
||||||
|
let second = obkv::KvReader::new(current);
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
merge_two_obkvs(first, second, &mut buffer);
|
||||||
|
buffer
|
||||||
|
}))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn docid_word_positions_merge(key: &[u8], _values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
|
// Union of multiple FSTs
|
||||||
bail!("merging docid word positions is an error ({:?})", key.as_bstr())
|
pub fn fst_merge(_key: &[u8], values: &[Cow<[u8]>]) -> Result<Vec<u8>> {
|
||||||
|
let fsts = values.iter().map(fst::Set::new).collect::<StdResult<Vec<_>, _>>()?;
|
||||||
|
let op_builder: fst::set::OpBuilder = fsts.iter().map(|fst| fst.into_stream()).collect();
|
||||||
|
let op = op_builder.r#union();
|
||||||
|
|
||||||
|
let mut build = fst::SetBuilder::memory();
|
||||||
|
build.extend_stream(op.into_stream()).unwrap();
|
||||||
|
Ok(build.into_inner().unwrap())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn field_id_docid_facet_values_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
|
pub fn keep_first(_key: &[u8], values: &[Cow<[u8]>]) -> Result<Vec<u8>> {
|
||||||
let first = values.first().context("no value to merge")?;
|
Ok(values.first().unwrap().to_vec())
|
||||||
ensure!(values.iter().all(|v| v == first), "invalid field id docid facet value merging");
|
|
||||||
Ok(first.to_vec())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn words_pairs_proximities_docids_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
|
|
||||||
cbo_roaring_bitmap_merge(values)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn word_prefix_level_positions_docids_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
|
|
||||||
cbo_roaring_bitmap_merge(values)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn word_level_position_docids_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
|
|
||||||
cbo_roaring_bitmap_merge(values)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn field_id_word_count_docids_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
|
|
||||||
cbo_roaring_bitmap_merge(values)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn facet_field_value_docids_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
|
|
||||||
cbo_roaring_bitmap_merge(values)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn documents_merge(key: &[u8], _values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
|
|
||||||
bail!("merging documents is an error ({:?})", key.as_bstr())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn merge_two_obkvs(base: obkv::KvReader, update: obkv::KvReader, buffer: &mut Vec<u8>) {
|
pub fn merge_two_obkvs(base: obkv::KvReader, update: obkv::KvReader, buffer: &mut Vec<u8>) {
|
||||||
@ -88,7 +56,7 @@ pub fn merge_two_obkvs(base: obkv::KvReader, update: obkv::KvReader, buffer: &mu
|
|||||||
writer.finish().unwrap();
|
writer.finish().unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
fn roaring_bitmap_merge(values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
|
pub fn roaring_bitmap_merge(_key: &[u8], values: &[Cow<[u8]>]) -> Result<Vec<u8>> {
|
||||||
let (head, tail) = values.split_first().unwrap();
|
let (head, tail) = values.split_first().unwrap();
|
||||||
let mut head = RoaringBitmap::deserialize_from(&head[..])?;
|
let mut head = RoaringBitmap::deserialize_from(&head[..])?;
|
||||||
|
|
||||||
@ -102,7 +70,7 @@ fn roaring_bitmap_merge(values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
|
|||||||
Ok(vec)
|
Ok(vec)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn cbo_roaring_bitmap_merge(values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
|
pub fn cbo_roaring_bitmap_merge(_key: &[u8], values: &[Cow<[u8]>]) -> Result<Vec<u8>> {
|
||||||
let (head, tail) = values.split_first().unwrap();
|
let (head, tail) = values.split_first().unwrap();
|
||||||
let mut head = CboRoaringBitmapCodec::deserialize_from(&head[..])?;
|
let mut head = CboRoaringBitmapCodec::deserialize_from(&head[..])?;
|
||||||
|
|
||||||
@ -112,6 +80,6 @@ fn cbo_roaring_bitmap_merge(values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let mut vec = Vec::new();
|
let mut vec = Vec::new();
|
||||||
CboRoaringBitmapCodec::serialize_into(&head, &mut vec)?;
|
CboRoaringBitmapCodec::serialize_into(&head, &mut vec);
|
||||||
Ok(vec)
|
Ok(vec)
|
||||||
}
|
}
|
||||||
|
@ -3,11 +3,11 @@ use std::collections::HashSet;
|
|||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{self, Seek, SeekFrom, BufReader, BufRead};
|
use std::io::{self, Seek, SeekFrom, BufReader, BufRead};
|
||||||
use std::num::{NonZeroU32, NonZeroUsize};
|
use std::num::{NonZeroU32, NonZeroUsize};
|
||||||
|
use std::result::Result as StdResult;
|
||||||
use std::str;
|
use std::str;
|
||||||
use std::sync::mpsc::sync_channel;
|
use std::sync::mpsc::sync_channel;
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
|
||||||
use anyhow::Context;
|
|
||||||
use bstr::ByteSlice as _;
|
use bstr::ByteSlice as _;
|
||||||
use chrono::Utc;
|
use chrono::Utc;
|
||||||
use grenad::{MergerIter, Writer, Sorter, Merger, Reader, FileFuse, CompressionType};
|
use grenad::{MergerIter, Writer, Sorter, Merger, Reader, FileFuse, CompressionType};
|
||||||
@ -18,18 +18,15 @@ use rayon::prelude::*;
|
|||||||
use rayon::ThreadPool;
|
use rayon::ThreadPool;
|
||||||
use serde::{Serialize, Deserialize};
|
use serde::{Serialize, Deserialize};
|
||||||
|
|
||||||
use crate::index::Index;
|
use crate::error::{Error, InternalError};
|
||||||
|
use crate::{Index, Result};
|
||||||
use crate::update::{
|
use crate::update::{
|
||||||
Facets, WordsLevelPositions, WordPrefixDocids, WordsPrefixesFst, UpdateIndexingStep,
|
Facets, WordsLevelPositions, WordPrefixDocids, WordsPrefixesFst, UpdateIndexingStep,
|
||||||
WordPrefixPairProximityDocids,
|
WordPrefixPairProximityDocids,
|
||||||
};
|
};
|
||||||
use self::store::{Store, Readers};
|
use self::store::{Store, Readers};
|
||||||
pub use self::merge_function::{
|
pub use self::merge_function::{
|
||||||
main_merge, word_docids_merge, words_pairs_proximities_docids_merge,
|
fst_merge, cbo_roaring_bitmap_merge, roaring_bitmap_merge, keep_first
|
||||||
docid_word_positions_merge, documents_merge,
|
|
||||||
word_level_position_docids_merge, word_prefix_level_positions_docids_merge,
|
|
||||||
facet_field_value_docids_merge, field_id_docid_facet_values_merge,
|
|
||||||
field_id_word_count_docids_merge,
|
|
||||||
};
|
};
|
||||||
pub use self::transform::{Transform, TransformOutput};
|
pub use self::transform::{Transform, TransformOutput};
|
||||||
|
|
||||||
@ -60,14 +57,14 @@ pub fn create_writer(typ: CompressionType, level: Option<u32>, file: File) -> io
|
|||||||
builder.build(file)
|
builder.build(file)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn create_sorter(
|
pub fn create_sorter<E>(
|
||||||
merge: MergeFn,
|
merge: MergeFn<E>,
|
||||||
chunk_compression_type: CompressionType,
|
chunk_compression_type: CompressionType,
|
||||||
chunk_compression_level: Option<u32>,
|
chunk_compression_level: Option<u32>,
|
||||||
chunk_fusing_shrink_size: Option<u64>,
|
chunk_fusing_shrink_size: Option<u64>,
|
||||||
max_nb_chunks: Option<usize>,
|
max_nb_chunks: Option<usize>,
|
||||||
max_memory: Option<usize>,
|
max_memory: Option<usize>,
|
||||||
) -> Sorter<MergeFn>
|
) -> Sorter<MergeFn<E>>
|
||||||
{
|
{
|
||||||
let mut builder = Sorter::builder(merge);
|
let mut builder = Sorter::builder(merge);
|
||||||
if let Some(shrink_size) = chunk_fusing_shrink_size {
|
if let Some(shrink_size) = chunk_fusing_shrink_size {
|
||||||
@ -86,7 +83,7 @@ pub fn create_sorter(
|
|||||||
builder.build()
|
builder.build()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn writer_into_reader(writer: Writer<File>, shrink_size: Option<u64>) -> anyhow::Result<Reader<FileFuse>> {
|
pub fn writer_into_reader(writer: Writer<File>, shrink_size: Option<u64>) -> Result<Reader<FileFuse>> {
|
||||||
let mut file = writer.into_inner()?;
|
let mut file = writer.into_inner()?;
|
||||||
file.seek(SeekFrom::Start(0))?;
|
file.seek(SeekFrom::Start(0))?;
|
||||||
let file = if let Some(shrink_size) = shrink_size {
|
let file = if let Some(shrink_size) = shrink_size {
|
||||||
@ -97,19 +94,25 @@ pub fn writer_into_reader(writer: Writer<File>, shrink_size: Option<u64>) -> any
|
|||||||
Reader::new(file).map_err(Into::into)
|
Reader::new(file).map_err(Into::into)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn merge_readers(sources: Vec<Reader<FileFuse>>, merge: MergeFn) -> Merger<FileFuse, MergeFn> {
|
pub fn merge_readers<E>(
|
||||||
|
sources: Vec<Reader<FileFuse>>,
|
||||||
|
merge: MergeFn<E>,
|
||||||
|
) -> Merger<FileFuse, MergeFn<E>>
|
||||||
|
{
|
||||||
let mut builder = Merger::builder(merge);
|
let mut builder = Merger::builder(merge);
|
||||||
builder.extend(sources);
|
builder.extend(sources);
|
||||||
builder.build()
|
builder.build()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn merge_into_lmdb_database(
|
pub fn merge_into_lmdb_database<E>(
|
||||||
wtxn: &mut heed::RwTxn,
|
wtxn: &mut heed::RwTxn,
|
||||||
database: heed::PolyDatabase,
|
database: heed::PolyDatabase,
|
||||||
sources: Vec<Reader<FileFuse>>,
|
sources: Vec<Reader<FileFuse>>,
|
||||||
merge: MergeFn,
|
merge: MergeFn<E>,
|
||||||
method: WriteMethod,
|
method: WriteMethod,
|
||||||
) -> anyhow::Result<()>
|
) -> Result<()>
|
||||||
|
where
|
||||||
|
Error: From<E>,
|
||||||
{
|
{
|
||||||
debug!("Merging {} MTBL stores...", sources.len());
|
debug!("Merging {} MTBL stores...", sources.len());
|
||||||
let before = Instant::now();
|
let before = Instant::now();
|
||||||
@ -127,13 +130,15 @@ pub fn merge_into_lmdb_database(
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn write_into_lmdb_database(
|
pub fn write_into_lmdb_database<E>(
|
||||||
wtxn: &mut heed::RwTxn,
|
wtxn: &mut heed::RwTxn,
|
||||||
database: heed::PolyDatabase,
|
database: heed::PolyDatabase,
|
||||||
mut reader: Reader<FileFuse>,
|
mut reader: Reader<FileFuse>,
|
||||||
merge: MergeFn,
|
merge: MergeFn<E>,
|
||||||
method: WriteMethod,
|
method: WriteMethod,
|
||||||
) -> anyhow::Result<()>
|
) -> Result<()>
|
||||||
|
where
|
||||||
|
Error: From<E>,
|
||||||
{
|
{
|
||||||
debug!("Writing MTBL stores...");
|
debug!("Writing MTBL stores...");
|
||||||
let before = Instant::now();
|
let before = Instant::now();
|
||||||
@ -142,9 +147,7 @@ pub fn write_into_lmdb_database(
|
|||||||
WriteMethod::Append => {
|
WriteMethod::Append => {
|
||||||
let mut out_iter = database.iter_mut::<_, ByteSlice, ByteSlice>(wtxn)?;
|
let mut out_iter = database.iter_mut::<_, ByteSlice, ByteSlice>(wtxn)?;
|
||||||
while let Some((k, v)) = reader.next()? {
|
while let Some((k, v)) = reader.next()? {
|
||||||
out_iter.append(k, v).with_context(|| {
|
out_iter.append(k, v)?;
|
||||||
format!("writing {:?} into LMDB", k.as_bstr())
|
|
||||||
})?;
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
WriteMethod::GetMergePut => {
|
WriteMethod::GetMergePut => {
|
||||||
@ -152,7 +155,7 @@ pub fn write_into_lmdb_database(
|
|||||||
let mut iter = database.prefix_iter_mut::<_, ByteSlice, ByteSlice>(wtxn, k)?;
|
let mut iter = database.prefix_iter_mut::<_, ByteSlice, ByteSlice>(wtxn, k)?;
|
||||||
match iter.next().transpose()? {
|
match iter.next().transpose()? {
|
||||||
Some((key, old_val)) if key == k => {
|
Some((key, old_val)) if key == k => {
|
||||||
let vals = vec![Cow::Borrowed(old_val), Cow::Borrowed(v)];
|
let vals = &[Cow::Borrowed(old_val), Cow::Borrowed(v)][..];
|
||||||
let val = merge(k, &vals)?;
|
let val = merge(k, &vals)?;
|
||||||
iter.put_current(k, &val)?;
|
iter.put_current(k, &val)?;
|
||||||
},
|
},
|
||||||
@ -169,13 +172,16 @@ pub fn write_into_lmdb_database(
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn sorter_into_lmdb_database(
|
pub fn sorter_into_lmdb_database<E>(
|
||||||
wtxn: &mut heed::RwTxn,
|
wtxn: &mut heed::RwTxn,
|
||||||
database: heed::PolyDatabase,
|
database: heed::PolyDatabase,
|
||||||
sorter: Sorter<MergeFn>,
|
sorter: Sorter<MergeFn<E>>,
|
||||||
merge: MergeFn,
|
merge: MergeFn<E>,
|
||||||
method: WriteMethod,
|
method: WriteMethod,
|
||||||
) -> anyhow::Result<()>
|
) -> Result<()>
|
||||||
|
where
|
||||||
|
Error: From<E>,
|
||||||
|
Error: From<grenad::Error<E>>
|
||||||
{
|
{
|
||||||
debug!("Writing MTBL sorter...");
|
debug!("Writing MTBL sorter...");
|
||||||
let before = Instant::now();
|
let before = Instant::now();
|
||||||
@ -192,21 +198,21 @@ pub fn sorter_into_lmdb_database(
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn merger_iter_into_lmdb_database<R: io::Read>(
|
fn merger_iter_into_lmdb_database<R: io::Read, E>(
|
||||||
wtxn: &mut heed::RwTxn,
|
wtxn: &mut heed::RwTxn,
|
||||||
database: heed::PolyDatabase,
|
database: heed::PolyDatabase,
|
||||||
mut sorter: MergerIter<R, MergeFn>,
|
mut sorter: MergerIter<R, MergeFn<E>>,
|
||||||
merge: MergeFn,
|
merge: MergeFn<E>,
|
||||||
method: WriteMethod,
|
method: WriteMethod,
|
||||||
) -> anyhow::Result<()>
|
) -> Result<()>
|
||||||
|
where
|
||||||
|
Error: From<E>,
|
||||||
{
|
{
|
||||||
match method {
|
match method {
|
||||||
WriteMethod::Append => {
|
WriteMethod::Append => {
|
||||||
let mut out_iter = database.iter_mut::<_, ByteSlice, ByteSlice>(wtxn)?;
|
let mut out_iter = database.iter_mut::<_, ByteSlice, ByteSlice>(wtxn)?;
|
||||||
while let Some((k, v)) = sorter.next()? {
|
while let Some((k, v)) = sorter.next()? {
|
||||||
out_iter.append(k, v).with_context(|| {
|
out_iter.append(k, v)?;
|
||||||
format!("writing {:?} into LMDB", k.as_bstr())
|
|
||||||
})?;
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
WriteMethod::GetMergePut => {
|
WriteMethod::GetMergePut => {
|
||||||
@ -215,7 +221,10 @@ fn merger_iter_into_lmdb_database<R: io::Read>(
|
|||||||
match iter.next().transpose()? {
|
match iter.next().transpose()? {
|
||||||
Some((key, old_val)) if key == k => {
|
Some((key, old_val)) if key == k => {
|
||||||
let vals = vec![Cow::Borrowed(old_val), Cow::Borrowed(v)];
|
let vals = vec![Cow::Borrowed(old_val), Cow::Borrowed(v)];
|
||||||
let val = merge(k, &vals).expect("merge failed");
|
let val = merge(k, &vals).map_err(|_| {
|
||||||
|
// TODO just wrap this error?
|
||||||
|
InternalError::IndexingMergingKeys { process: "get-put-merge" }
|
||||||
|
})?;
|
||||||
iter.put_current(k, &val)?;
|
iter.put_current(k, &val)?;
|
||||||
},
|
},
|
||||||
_ => {
|
_ => {
|
||||||
@ -322,7 +331,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
self.autogenerate_docids = false;
|
self.autogenerate_docids = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn execute<R, F>(self, reader: R, progress_callback: F) -> anyhow::Result<DocumentAdditionResult>
|
pub fn execute<R, F>(self, reader: R, progress_callback: F) -> Result<DocumentAdditionResult>
|
||||||
where
|
where
|
||||||
R: io::Read,
|
R: io::Read,
|
||||||
F: Fn(UpdateIndexingStep, u64) + Sync,
|
F: Fn(UpdateIndexingStep, u64) + Sync,
|
||||||
@ -369,7 +378,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
Ok(DocumentAdditionResult { nb_documents })
|
Ok(DocumentAdditionResult { nb_documents })
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn execute_raw<F>(self, output: TransformOutput, progress_callback: F) -> anyhow::Result<()>
|
pub fn execute_raw<F>(self, output: TransformOutput, progress_callback: F) -> Result<()>
|
||||||
where
|
where
|
||||||
F: Fn(UpdateIndexingStep) + Sync
|
F: Fn(UpdateIndexingStep) + Sync
|
||||||
{
|
{
|
||||||
@ -407,15 +416,12 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
debug!("{} documents actually deleted", deleted_documents_count);
|
debug!("{} documents actually deleted", deleted_documents_count);
|
||||||
}
|
}
|
||||||
|
|
||||||
let mmap;
|
if documents_count == 0 {
|
||||||
let bytes = if documents_count == 0 {
|
return Ok(());
|
||||||
&[][..]
|
}
|
||||||
} else {
|
|
||||||
mmap = unsafe { Mmap::map(&documents_file).context("mmaping the transform documents file")? };
|
|
||||||
&mmap
|
|
||||||
};
|
|
||||||
|
|
||||||
let documents = grenad::Reader::new(bytes).unwrap();
|
let bytes = unsafe { Mmap::map(&documents_file)? };
|
||||||
|
let documents = grenad::Reader::new(bytes.as_bytes()).unwrap();
|
||||||
|
|
||||||
// The enum which indicates the type of the readers
|
// The enum which indicates the type of the readers
|
||||||
// merges that are potentially done on different threads.
|
// merges that are potentially done on different threads.
|
||||||
@ -481,7 +487,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
&progress_callback,
|
&progress_callback,
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
.collect::<Result<Vec<_>, _>>()?;
|
.collect::<StdResult<Vec<_>, _>>()?;
|
||||||
|
|
||||||
let mut main_readers = Vec::with_capacity(readers.len());
|
let mut main_readers = Vec::with_capacity(readers.len());
|
||||||
let mut word_docids_readers = Vec::with_capacity(readers.len());
|
let mut word_docids_readers = Vec::with_capacity(readers.len());
|
||||||
@ -539,22 +545,22 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
debug!("Merging the main, word docids and words pairs proximity docids in parallel...");
|
debug!("Merging the main, word docids and words pairs proximity docids in parallel...");
|
||||||
rayon::spawn(move || {
|
rayon::spawn(move || {
|
||||||
vec![
|
vec![
|
||||||
(DatabaseType::Main, main_readers, main_merge as MergeFn),
|
(DatabaseType::Main, main_readers, fst_merge as MergeFn<_>),
|
||||||
(DatabaseType::WordDocids, word_docids_readers, word_docids_merge),
|
(DatabaseType::WordDocids, word_docids_readers, roaring_bitmap_merge),
|
||||||
(
|
(
|
||||||
DatabaseType::FacetLevel0NumbersDocids,
|
DatabaseType::FacetLevel0NumbersDocids,
|
||||||
facet_field_numbers_docids_readers,
|
facet_field_numbers_docids_readers,
|
||||||
facet_field_value_docids_merge,
|
cbo_roaring_bitmap_merge,
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
DatabaseType::WordLevel0PositionDocids,
|
DatabaseType::WordLevel0PositionDocids,
|
||||||
word_level_position_docids_readers,
|
word_level_position_docids_readers,
|
||||||
word_level_position_docids_merge,
|
cbo_roaring_bitmap_merge,
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
DatabaseType::FieldIdWordCountDocids,
|
DatabaseType::FieldIdWordCountDocids,
|
||||||
field_id_word_count_docids_readers,
|
field_id_word_count_docids_readers,
|
||||||
field_id_word_count_docids_merge,
|
cbo_roaring_bitmap_merge,
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
.into_par_iter()
|
.into_par_iter()
|
||||||
@ -574,7 +580,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
facet_field_strings_docids_readers,
|
facet_field_strings_docids_readers,
|
||||||
field_id_docid_facet_numbers_readers,
|
field_id_docid_facet_numbers_readers,
|
||||||
field_id_docid_facet_strings_readers,
|
field_id_docid_facet_strings_readers,
|
||||||
)) as anyhow::Result<_>
|
)) as Result<_>
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
let (
|
let (
|
||||||
@ -622,12 +628,12 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
total_databases,
|
total_databases,
|
||||||
});
|
});
|
||||||
|
|
||||||
debug!("Writing the docid word positions into LMDB on disk...");
|
debug!("Inserting the docid word positions into LMDB on disk...");
|
||||||
merge_into_lmdb_database(
|
merge_into_lmdb_database(
|
||||||
self.wtxn,
|
self.wtxn,
|
||||||
*self.index.docid_word_positions.as_polymorph(),
|
*self.index.docid_word_positions.as_polymorph(),
|
||||||
docid_word_positions_readers,
|
docid_word_positions_readers,
|
||||||
docid_word_positions_merge,
|
keep_first,
|
||||||
write_method
|
write_method
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
@ -637,12 +643,12 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
total_databases,
|
total_databases,
|
||||||
});
|
});
|
||||||
|
|
||||||
debug!("Writing the documents into LMDB on disk...");
|
debug!("Inserting the documents into LMDB on disk...");
|
||||||
merge_into_lmdb_database(
|
merge_into_lmdb_database(
|
||||||
self.wtxn,
|
self.wtxn,
|
||||||
*self.index.documents.as_polymorph(),
|
*self.index.documents.as_polymorph(),
|
||||||
documents_readers,
|
documents_readers,
|
||||||
documents_merge,
|
keep_first,
|
||||||
write_method
|
write_method
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
@ -657,7 +663,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
self.wtxn,
|
self.wtxn,
|
||||||
*self.index.facet_id_string_docids.as_polymorph(),
|
*self.index.facet_id_string_docids.as_polymorph(),
|
||||||
facet_field_strings_docids_readers,
|
facet_field_strings_docids_readers,
|
||||||
facet_field_value_docids_merge,
|
cbo_roaring_bitmap_merge,
|
||||||
write_method,
|
write_method,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
@ -672,7 +678,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
self.wtxn,
|
self.wtxn,
|
||||||
*self.index.field_id_docid_facet_f64s.as_polymorph(),
|
*self.index.field_id_docid_facet_f64s.as_polymorph(),
|
||||||
field_id_docid_facet_numbers_readers,
|
field_id_docid_facet_numbers_readers,
|
||||||
field_id_docid_facet_values_merge,
|
keep_first,
|
||||||
write_method,
|
write_method,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
@ -687,7 +693,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
self.wtxn,
|
self.wtxn,
|
||||||
*self.index.field_id_docid_facet_strings.as_polymorph(),
|
*self.index.field_id_docid_facet_strings.as_polymorph(),
|
||||||
field_id_docid_facet_strings_readers,
|
field_id_docid_facet_strings_readers,
|
||||||
field_id_docid_facet_values_merge,
|
keep_first,
|
||||||
write_method,
|
write_method,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
@ -702,7 +708,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
self.wtxn,
|
self.wtxn,
|
||||||
*self.index.word_pair_proximity_docids.as_polymorph(),
|
*self.index.word_pair_proximity_docids.as_polymorph(),
|
||||||
words_pairs_proximities_docids_readers,
|
words_pairs_proximities_docids_readers,
|
||||||
words_pairs_proximities_docids_merge,
|
cbo_roaring_bitmap_merge,
|
||||||
write_method,
|
write_method,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
@ -721,7 +727,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
self.wtxn,
|
self.wtxn,
|
||||||
self.index.main,
|
self.index.main,
|
||||||
content,
|
content,
|
||||||
main_merge,
|
fst_merge,
|
||||||
WriteMethod::GetMergePut,
|
WriteMethod::GetMergePut,
|
||||||
)?;
|
)?;
|
||||||
},
|
},
|
||||||
@ -732,7 +738,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
self.wtxn,
|
self.wtxn,
|
||||||
db,
|
db,
|
||||||
content,
|
content,
|
||||||
word_docids_merge,
|
roaring_bitmap_merge,
|
||||||
write_method,
|
write_method,
|
||||||
)?;
|
)?;
|
||||||
},
|
},
|
||||||
@ -743,7 +749,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
self.wtxn,
|
self.wtxn,
|
||||||
db,
|
db,
|
||||||
content,
|
content,
|
||||||
facet_field_value_docids_merge,
|
cbo_roaring_bitmap_merge,
|
||||||
write_method,
|
write_method,
|
||||||
)?;
|
)?;
|
||||||
},
|
},
|
||||||
@ -754,7 +760,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
self.wtxn,
|
self.wtxn,
|
||||||
db,
|
db,
|
||||||
content,
|
content,
|
||||||
field_id_word_count_docids_merge,
|
cbo_roaring_bitmap_merge,
|
||||||
write_method,
|
write_method,
|
||||||
)?;
|
)?;
|
||||||
},
|
},
|
||||||
@ -765,7 +771,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
self.wtxn,
|
self.wtxn,
|
||||||
db,
|
db,
|
||||||
content,
|
content,
|
||||||
word_level_position_docids_merge,
|
cbo_roaring_bitmap_merge,
|
||||||
write_method,
|
write_method,
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
|
@ -6,7 +6,6 @@ use std::iter::FromIterator;
|
|||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
use std::{cmp, iter};
|
use std::{cmp, iter};
|
||||||
|
|
||||||
use anyhow::Context;
|
|
||||||
use bstr::ByteSlice as _;
|
use bstr::ByteSlice as _;
|
||||||
use fst::Set;
|
use fst::Set;
|
||||||
use grenad::{Reader, FileFuse, Writer, Sorter, CompressionType};
|
use grenad::{Reader, FileFuse, Writer, Sorter, CompressionType};
|
||||||
@ -19,24 +18,21 @@ use roaring::RoaringBitmap;
|
|||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
use tempfile::tempfile;
|
use tempfile::tempfile;
|
||||||
|
|
||||||
|
use crate::error::{Error, InternalError, SerializationError};
|
||||||
use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec};
|
use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec};
|
||||||
use crate::heed_codec::facet::{FieldDocIdFacetStringCodec, FieldDocIdFacetF64Codec};
|
use crate::heed_codec::facet::{FieldDocIdFacetStringCodec, FieldDocIdFacetF64Codec};
|
||||||
use crate::heed_codec::{BoRoaringBitmapCodec, CboRoaringBitmapCodec};
|
use crate::heed_codec::{BoRoaringBitmapCodec, CboRoaringBitmapCodec};
|
||||||
use crate::update::UpdateIndexingStep;
|
use crate::update::UpdateIndexingStep;
|
||||||
use crate::{json_to_string, SmallVec32, Position, DocumentId, FieldId};
|
use crate::{json_to_string, SmallVec32, Position, DocumentId, FieldId, Result};
|
||||||
|
|
||||||
use super::{MergeFn, create_writer, create_sorter, writer_into_reader};
|
use super::{MergeFn, create_writer, create_sorter, writer_into_reader};
|
||||||
use super::merge_function::{
|
use super::merge_function::{fst_merge, keep_first, roaring_bitmap_merge, cbo_roaring_bitmap_merge};
|
||||||
main_merge, word_docids_merge, words_pairs_proximities_docids_merge,
|
|
||||||
word_level_position_docids_merge, facet_field_value_docids_merge,
|
|
||||||
field_id_docid_facet_values_merge, field_id_word_count_docids_merge,
|
|
||||||
};
|
|
||||||
|
|
||||||
const LMDB_MAX_KEY_LENGTH: usize = 511;
|
const LMDB_MAX_KEY_LENGTH: usize = 511;
|
||||||
const ONE_KILOBYTE: usize = 1024 * 1024;
|
const ONE_KILOBYTE: usize = 1024 * 1024;
|
||||||
|
|
||||||
const MAX_POSITION: usize = 1000;
|
const MAX_POSITION: usize = 1000;
|
||||||
const WORDS_FST_KEY: &[u8] = crate::index::WORDS_FST_KEY.as_bytes();
|
const WORDS_FST_KEY: &[u8] = crate::index::main_key::WORDS_FST_KEY.as_bytes();
|
||||||
|
|
||||||
pub struct Readers {
|
pub struct Readers {
|
||||||
pub main: Reader<FileFuse>,
|
pub main: Reader<FileFuse>,
|
||||||
@ -70,15 +66,15 @@ pub struct Store<'s, A> {
|
|||||||
chunk_compression_level: Option<u32>,
|
chunk_compression_level: Option<u32>,
|
||||||
chunk_fusing_shrink_size: Option<u64>,
|
chunk_fusing_shrink_size: Option<u64>,
|
||||||
// MTBL sorters
|
// MTBL sorters
|
||||||
main_sorter: Sorter<MergeFn>,
|
main_sorter: Sorter<MergeFn<Error>>,
|
||||||
word_docids_sorter: Sorter<MergeFn>,
|
word_docids_sorter: Sorter<MergeFn<Error>>,
|
||||||
words_pairs_proximities_docids_sorter: Sorter<MergeFn>,
|
words_pairs_proximities_docids_sorter: Sorter<MergeFn<Error>>,
|
||||||
word_level_position_docids_sorter: Sorter<MergeFn>,
|
word_level_position_docids_sorter: Sorter<MergeFn<Error>>,
|
||||||
field_id_word_count_docids_sorter: Sorter<MergeFn>,
|
field_id_word_count_docids_sorter: Sorter<MergeFn<Error>>,
|
||||||
facet_field_numbers_docids_sorter: Sorter<MergeFn>,
|
facet_field_numbers_docids_sorter: Sorter<MergeFn<Error>>,
|
||||||
facet_field_strings_docids_sorter: Sorter<MergeFn>,
|
facet_field_strings_docids_sorter: Sorter<MergeFn<Error>>,
|
||||||
field_id_docid_facet_numbers_sorter: Sorter<MergeFn>,
|
field_id_docid_facet_numbers_sorter: Sorter<MergeFn<Error>>,
|
||||||
field_id_docid_facet_strings_sorter: Sorter<MergeFn>,
|
field_id_docid_facet_strings_sorter: Sorter<MergeFn<Error>>,
|
||||||
// MTBL writers
|
// MTBL writers
|
||||||
docid_word_positions_writer: Writer<File>,
|
docid_word_positions_writer: Writer<File>,
|
||||||
documents_writer: Writer<File>,
|
documents_writer: Writer<File>,
|
||||||
@ -97,14 +93,14 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
|||||||
chunk_compression_level: Option<u32>,
|
chunk_compression_level: Option<u32>,
|
||||||
chunk_fusing_shrink_size: Option<u64>,
|
chunk_fusing_shrink_size: Option<u64>,
|
||||||
stop_words: Option<&'s Set<A>>,
|
stop_words: Option<&'s Set<A>>,
|
||||||
) -> anyhow::Result<Self>
|
) -> Result<Self>
|
||||||
{
|
{
|
||||||
// We divide the max memory by the number of sorter the Store have.
|
// We divide the max memory by the number of sorter the Store have.
|
||||||
let max_memory = max_memory.map(|mm| cmp::max(ONE_KILOBYTE, mm / 5));
|
let max_memory = max_memory.map(|mm| cmp::max(ONE_KILOBYTE, mm / 5));
|
||||||
let linked_hash_map_size = linked_hash_map_size.unwrap_or(500);
|
let linked_hash_map_size = linked_hash_map_size.unwrap_or(500);
|
||||||
|
|
||||||
let main_sorter = create_sorter(
|
let main_sorter = create_sorter(
|
||||||
main_merge,
|
fst_merge,
|
||||||
chunk_compression_type,
|
chunk_compression_type,
|
||||||
chunk_compression_level,
|
chunk_compression_level,
|
||||||
chunk_fusing_shrink_size,
|
chunk_fusing_shrink_size,
|
||||||
@ -112,7 +108,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
|||||||
max_memory,
|
max_memory,
|
||||||
);
|
);
|
||||||
let word_docids_sorter = create_sorter(
|
let word_docids_sorter = create_sorter(
|
||||||
word_docids_merge,
|
roaring_bitmap_merge,
|
||||||
chunk_compression_type,
|
chunk_compression_type,
|
||||||
chunk_compression_level,
|
chunk_compression_level,
|
||||||
chunk_fusing_shrink_size,
|
chunk_fusing_shrink_size,
|
||||||
@ -120,7 +116,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
|||||||
max_memory,
|
max_memory,
|
||||||
);
|
);
|
||||||
let words_pairs_proximities_docids_sorter = create_sorter(
|
let words_pairs_proximities_docids_sorter = create_sorter(
|
||||||
words_pairs_proximities_docids_merge,
|
cbo_roaring_bitmap_merge,
|
||||||
chunk_compression_type,
|
chunk_compression_type,
|
||||||
chunk_compression_level,
|
chunk_compression_level,
|
||||||
chunk_fusing_shrink_size,
|
chunk_fusing_shrink_size,
|
||||||
@ -128,7 +124,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
|||||||
max_memory,
|
max_memory,
|
||||||
);
|
);
|
||||||
let word_level_position_docids_sorter = create_sorter(
|
let word_level_position_docids_sorter = create_sorter(
|
||||||
word_level_position_docids_merge,
|
cbo_roaring_bitmap_merge,
|
||||||
chunk_compression_type,
|
chunk_compression_type,
|
||||||
chunk_compression_level,
|
chunk_compression_level,
|
||||||
chunk_fusing_shrink_size,
|
chunk_fusing_shrink_size,
|
||||||
@ -136,7 +132,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
|||||||
max_memory,
|
max_memory,
|
||||||
);
|
);
|
||||||
let field_id_word_count_docids_sorter = create_sorter(
|
let field_id_word_count_docids_sorter = create_sorter(
|
||||||
field_id_word_count_docids_merge,
|
cbo_roaring_bitmap_merge,
|
||||||
chunk_compression_type,
|
chunk_compression_type,
|
||||||
chunk_compression_level,
|
chunk_compression_level,
|
||||||
chunk_fusing_shrink_size,
|
chunk_fusing_shrink_size,
|
||||||
@ -144,7 +140,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
|||||||
max_memory,
|
max_memory,
|
||||||
);
|
);
|
||||||
let facet_field_numbers_docids_sorter = create_sorter(
|
let facet_field_numbers_docids_sorter = create_sorter(
|
||||||
facet_field_value_docids_merge,
|
cbo_roaring_bitmap_merge,
|
||||||
chunk_compression_type,
|
chunk_compression_type,
|
||||||
chunk_compression_level,
|
chunk_compression_level,
|
||||||
chunk_fusing_shrink_size,
|
chunk_fusing_shrink_size,
|
||||||
@ -152,7 +148,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
|||||||
max_memory,
|
max_memory,
|
||||||
);
|
);
|
||||||
let facet_field_strings_docids_sorter = create_sorter(
|
let facet_field_strings_docids_sorter = create_sorter(
|
||||||
facet_field_value_docids_merge,
|
cbo_roaring_bitmap_merge,
|
||||||
chunk_compression_type,
|
chunk_compression_type,
|
||||||
chunk_compression_level,
|
chunk_compression_level,
|
||||||
chunk_fusing_shrink_size,
|
chunk_fusing_shrink_size,
|
||||||
@ -160,7 +156,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
|||||||
max_memory,
|
max_memory,
|
||||||
);
|
);
|
||||||
let field_id_docid_facet_numbers_sorter = create_sorter(
|
let field_id_docid_facet_numbers_sorter = create_sorter(
|
||||||
field_id_docid_facet_values_merge,
|
keep_first,
|
||||||
chunk_compression_type,
|
chunk_compression_type,
|
||||||
chunk_compression_level,
|
chunk_compression_level,
|
||||||
chunk_fusing_shrink_size,
|
chunk_fusing_shrink_size,
|
||||||
@ -168,7 +164,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
|||||||
Some(1024 * 1024 * 1024), // 1MB
|
Some(1024 * 1024 * 1024), // 1MB
|
||||||
);
|
);
|
||||||
let field_id_docid_facet_strings_sorter = create_sorter(
|
let field_id_docid_facet_strings_sorter = create_sorter(
|
||||||
field_id_docid_facet_values_merge,
|
keep_first,
|
||||||
chunk_compression_type,
|
chunk_compression_type,
|
||||||
chunk_compression_level,
|
chunk_compression_level,
|
||||||
chunk_fusing_shrink_size,
|
chunk_fusing_shrink_size,
|
||||||
@ -225,7 +221,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Save the documents ids under the position and word we have seen it.
|
// Save the documents ids under the position and word we have seen it.
|
||||||
fn insert_word_docid(&mut self, word: &str, id: DocumentId) -> anyhow::Result<()> {
|
fn insert_word_docid(&mut self, word: &str, id: DocumentId) -> Result<()> {
|
||||||
// if get_refresh finds the element it is assured to be at the end of the linked hash map.
|
// if get_refresh finds the element it is assured to be at the end of the linked hash map.
|
||||||
match self.word_docids.get_refresh(word.as_bytes()) {
|
match self.word_docids.get_refresh(word.as_bytes()) {
|
||||||
Some(old) => { old.insert(id); },
|
Some(old) => { old.insert(id); },
|
||||||
@ -250,7 +246,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
|||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
value: OrderedFloat<f64>,
|
value: OrderedFloat<f64>,
|
||||||
id: DocumentId,
|
id: DocumentId,
|
||||||
) -> anyhow::Result<()>
|
) -> Result<()>
|
||||||
{
|
{
|
||||||
let sorter = &mut self.field_id_docid_facet_numbers_sorter;
|
let sorter = &mut self.field_id_docid_facet_numbers_sorter;
|
||||||
Self::write_field_id_docid_facet_number_value(sorter, field_id, id, value)?;
|
Self::write_field_id_docid_facet_number_value(sorter, field_id, id, value)?;
|
||||||
@ -283,7 +279,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
|||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
value: String,
|
value: String,
|
||||||
id: DocumentId,
|
id: DocumentId,
|
||||||
) -> anyhow::Result<()>
|
) -> Result<()>
|
||||||
{
|
{
|
||||||
let sorter = &mut self.field_id_docid_facet_strings_sorter;
|
let sorter = &mut self.field_id_docid_facet_strings_sorter;
|
||||||
Self::write_field_id_docid_facet_string_value(sorter, field_id, id, &value)?;
|
Self::write_field_id_docid_facet_string_value(sorter, field_id, id, &value)?;
|
||||||
@ -315,7 +311,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
|||||||
&mut self,
|
&mut self,
|
||||||
words_pairs_proximities: impl IntoIterator<Item=((&'a str, &'a str), u8)>,
|
words_pairs_proximities: impl IntoIterator<Item=((&'a str, &'a str), u8)>,
|
||||||
id: DocumentId,
|
id: DocumentId,
|
||||||
) -> anyhow::Result<()>
|
) -> Result<()>
|
||||||
{
|
{
|
||||||
for ((w1, w2), prox) in words_pairs_proximities {
|
for ((w1, w2), prox) in words_pairs_proximities {
|
||||||
let w1 = SmallVec32::from(w1.as_bytes());
|
let w1 = SmallVec32::from(w1.as_bytes());
|
||||||
@ -354,7 +350,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
|||||||
facet_numbers_values: &mut HashMap<FieldId, Vec<f64>>,
|
facet_numbers_values: &mut HashMap<FieldId, Vec<f64>>,
|
||||||
facet_strings_values: &mut HashMap<FieldId, Vec<String>>,
|
facet_strings_values: &mut HashMap<FieldId, Vec<String>>,
|
||||||
record: &[u8],
|
record: &[u8],
|
||||||
) -> anyhow::Result<()>
|
) -> Result<()>
|
||||||
{
|
{
|
||||||
// We compute the list of words pairs proximities (self-join) and write it directly to disk.
|
// We compute the list of words pairs proximities (self-join) and write it directly to disk.
|
||||||
let words_pair_proximities = compute_words_pair_proximities(&words_positions);
|
let words_pair_proximities = compute_words_pair_proximities(&words_positions);
|
||||||
@ -389,10 +385,12 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write_words_pairs_proximities(
|
fn write_words_pairs_proximities<E>(
|
||||||
sorter: &mut Sorter<MergeFn>,
|
sorter: &mut Sorter<MergeFn<E>>,
|
||||||
iter: impl IntoIterator<Item=((SmallVec32<u8>, SmallVec32<u8>, u8), RoaringBitmap)>,
|
iter: impl IntoIterator<Item=((SmallVec32<u8>, SmallVec32<u8>, u8), RoaringBitmap)>,
|
||||||
) -> anyhow::Result<()>
|
) -> Result<()>
|
||||||
|
where
|
||||||
|
Error: From<E>,
|
||||||
{
|
{
|
||||||
let mut key = Vec::new();
|
let mut key = Vec::new();
|
||||||
let mut buffer = Vec::new();
|
let mut buffer = Vec::new();
|
||||||
@ -407,7 +405,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
|||||||
// We serialize the document ids into a buffer
|
// We serialize the document ids into a buffer
|
||||||
buffer.clear();
|
buffer.clear();
|
||||||
buffer.reserve(CboRoaringBitmapCodec::serialized_size(&docids));
|
buffer.reserve(CboRoaringBitmapCodec::serialized_size(&docids));
|
||||||
CboRoaringBitmapCodec::serialize_into(&docids, &mut buffer)?;
|
CboRoaringBitmapCodec::serialize_into(&docids, &mut buffer);
|
||||||
// that we write under the generated key into MTBL
|
// that we write under the generated key into MTBL
|
||||||
if lmdb_key_valid_size(&key) {
|
if lmdb_key_valid_size(&key) {
|
||||||
sorter.insert(&key, &buffer)?;
|
sorter.insert(&key, &buffer)?;
|
||||||
@ -421,10 +419,11 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
|||||||
writer: &mut Writer<File>,
|
writer: &mut Writer<File>,
|
||||||
id: DocumentId,
|
id: DocumentId,
|
||||||
words_positions: &HashMap<String, SmallVec32<Position>>,
|
words_positions: &HashMap<String, SmallVec32<Position>>,
|
||||||
) -> anyhow::Result<()>
|
) -> Result<()>
|
||||||
{
|
{
|
||||||
// We prefix the words by the document id.
|
// We prefix the words by the document id.
|
||||||
let mut key = id.to_be_bytes().to_vec();
|
let mut key = id.to_be_bytes().to_vec();
|
||||||
|
let mut buffer = Vec::new();
|
||||||
let base_size = key.len();
|
let base_size = key.len();
|
||||||
|
|
||||||
// We order the words lexicographically, this way we avoid passing by a sorter.
|
// We order the words lexicographically, this way we avoid passing by a sorter.
|
||||||
@ -433,24 +432,28 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
|||||||
for (word, positions) in words_positions {
|
for (word, positions) in words_positions {
|
||||||
key.truncate(base_size);
|
key.truncate(base_size);
|
||||||
key.extend_from_slice(word.as_bytes());
|
key.extend_from_slice(word.as_bytes());
|
||||||
|
buffer.clear();
|
||||||
|
|
||||||
// We serialize the positions into a buffer.
|
// We serialize the positions into a buffer.
|
||||||
let positions = RoaringBitmap::from_iter(positions.iter().cloned());
|
let positions = RoaringBitmap::from_iter(positions.iter().cloned());
|
||||||
let bytes = BoRoaringBitmapCodec::bytes_encode(&positions)
|
BoRoaringBitmapCodec::serialize_into(&positions, &mut buffer);
|
||||||
.with_context(|| "could not serialize positions")?;
|
|
||||||
// that we write under the generated key into MTBL
|
// that we write under the generated key into MTBL
|
||||||
if lmdb_key_valid_size(&key) {
|
if lmdb_key_valid_size(&key) {
|
||||||
writer.insert(&key, &bytes)?;
|
writer.insert(&key, &buffer)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write_word_position_docids(
|
fn write_word_position_docids<E>(
|
||||||
writer: &mut Sorter<MergeFn>,
|
writer: &mut Sorter<MergeFn<E>>,
|
||||||
document_id: DocumentId,
|
document_id: DocumentId,
|
||||||
words_positions: &HashMap<String, SmallVec32<Position>>,
|
words_positions: &HashMap<String, SmallVec32<Position>>,
|
||||||
) -> anyhow::Result<()>
|
) -> Result<()>
|
||||||
|
where
|
||||||
|
Error: From<E>,
|
||||||
{
|
{
|
||||||
let mut key_buffer = Vec::new();
|
let mut key_buffer = Vec::new();
|
||||||
let mut data_buffer = Vec::new();
|
let mut data_buffer = Vec::new();
|
||||||
@ -469,8 +472,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
|||||||
data_buffer.clear();
|
data_buffer.clear();
|
||||||
let positions = RoaringBitmap::from_iter(Some(document_id));
|
let positions = RoaringBitmap::from_iter(Some(document_id));
|
||||||
// We serialize the positions into a buffer.
|
// We serialize the positions into a buffer.
|
||||||
CboRoaringBitmapCodec::serialize_into(&positions, &mut data_buffer)
|
CboRoaringBitmapCodec::serialize_into(&positions, &mut data_buffer);
|
||||||
.with_context(|| "could not serialize positions")?;
|
|
||||||
|
|
||||||
// that we write under the generated key into MTBL
|
// that we write under the generated key into MTBL
|
||||||
if lmdb_key_valid_size(&key_buffer) {
|
if lmdb_key_valid_size(&key_buffer) {
|
||||||
@ -482,56 +484,71 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write_facet_field_string_docids<I>(
|
fn write_facet_field_string_docids<I, E>(
|
||||||
sorter: &mut Sorter<MergeFn>,
|
sorter: &mut Sorter<MergeFn<E>>,
|
||||||
iter: I,
|
iter: I,
|
||||||
) -> anyhow::Result<()>
|
) -> Result<()>
|
||||||
where I: IntoIterator<Item=((FieldId, String), RoaringBitmap)>
|
where
|
||||||
|
I: IntoIterator<Item=((FieldId, String), RoaringBitmap)>,
|
||||||
|
Error: From<E>,
|
||||||
{
|
{
|
||||||
|
let mut key_buffer = Vec::new();
|
||||||
|
let mut data_buffer = Vec::new();
|
||||||
|
|
||||||
for ((field_id, value), docids) in iter {
|
for ((field_id, value), docids) in iter {
|
||||||
let key = FacetValueStringCodec::bytes_encode(&(field_id, &value))
|
key_buffer.clear();
|
||||||
.map(Cow::into_owned)
|
data_buffer.clear();
|
||||||
.context("could not serialize facet key")?;
|
|
||||||
let bytes = CboRoaringBitmapCodec::bytes_encode(&docids)
|
FacetValueStringCodec::serialize_into(field_id, &value, &mut key_buffer);
|
||||||
.context("could not serialize docids")?;
|
CboRoaringBitmapCodec::serialize_into(&docids, &mut data_buffer);
|
||||||
if lmdb_key_valid_size(&key) {
|
|
||||||
sorter.insert(&key, &bytes)?;
|
if lmdb_key_valid_size(&key_buffer) {
|
||||||
|
sorter.insert(&key_buffer, &data_buffer)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write_facet_field_number_docids<I>(
|
fn write_facet_field_number_docids<I, E>(
|
||||||
sorter: &mut Sorter<MergeFn>,
|
sorter: &mut Sorter<MergeFn<E>>,
|
||||||
iter: I,
|
iter: I,
|
||||||
) -> anyhow::Result<()>
|
) -> Result<()>
|
||||||
where I: IntoIterator<Item=((FieldId, OrderedFloat<f64>), RoaringBitmap)>
|
where
|
||||||
|
I: IntoIterator<Item=((FieldId, OrderedFloat<f64>), RoaringBitmap)>,
|
||||||
|
Error: From<E>,
|
||||||
{
|
{
|
||||||
|
let mut data_buffer = Vec::new();
|
||||||
|
|
||||||
for ((field_id, value), docids) in iter {
|
for ((field_id, value), docids) in iter {
|
||||||
|
data_buffer.clear();
|
||||||
|
|
||||||
let key = FacetLevelValueF64Codec::bytes_encode(&(field_id, 0, *value, *value))
|
let key = FacetLevelValueF64Codec::bytes_encode(&(field_id, 0, *value, *value))
|
||||||
.map(Cow::into_owned)
|
.map(Cow::into_owned)
|
||||||
.context("could not serialize facet key")?;
|
.ok_or(SerializationError::Encoding { db_name: Some("facet level value") })?;
|
||||||
let bytes = CboRoaringBitmapCodec::bytes_encode(&docids)
|
|
||||||
.context("could not serialize docids")?;
|
CboRoaringBitmapCodec::serialize_into(&docids, &mut data_buffer);
|
||||||
|
|
||||||
if lmdb_key_valid_size(&key) {
|
if lmdb_key_valid_size(&key) {
|
||||||
sorter.insert(&key, &bytes)?;
|
sorter.insert(&key, &data_buffer)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write_field_id_docid_facet_number_value(
|
fn write_field_id_docid_facet_number_value<E>(
|
||||||
sorter: &mut Sorter<MergeFn>,
|
sorter: &mut Sorter<MergeFn<E>>,
|
||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
document_id: DocumentId,
|
document_id: DocumentId,
|
||||||
value: OrderedFloat<f64>,
|
value: OrderedFloat<f64>,
|
||||||
) -> anyhow::Result<()>
|
) -> Result<()>
|
||||||
|
where
|
||||||
|
Error: From<E>,
|
||||||
{
|
{
|
||||||
let key = FieldDocIdFacetF64Codec::bytes_encode(&(field_id, document_id, *value))
|
let key = FieldDocIdFacetF64Codec::bytes_encode(&(field_id, document_id, *value))
|
||||||
.map(Cow::into_owned)
|
.map(Cow::into_owned)
|
||||||
.context("could not serialize facet key")?;
|
.ok_or(SerializationError::Encoding { db_name: Some("facet level value") })?;
|
||||||
|
|
||||||
if lmdb_key_valid_size(&key) {
|
if lmdb_key_valid_size(&key) {
|
||||||
sorter.insert(&key, &[])?;
|
sorter.insert(&key, &[])?;
|
||||||
@ -540,26 +557,30 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write_field_id_docid_facet_string_value(
|
fn write_field_id_docid_facet_string_value<E>(
|
||||||
sorter: &mut Sorter<MergeFn>,
|
sorter: &mut Sorter<MergeFn<E>>,
|
||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
document_id: DocumentId,
|
document_id: DocumentId,
|
||||||
value: &str,
|
value: &str,
|
||||||
) -> anyhow::Result<()>
|
) -> Result<()>
|
||||||
|
where
|
||||||
|
Error: From<E>,
|
||||||
{
|
{
|
||||||
let key = FieldDocIdFacetStringCodec::bytes_encode(&(field_id, document_id, value))
|
let mut buffer = Vec::new();
|
||||||
.map(Cow::into_owned)
|
|
||||||
.context("could not serialize facet key")?;
|
|
||||||
|
|
||||||
if lmdb_key_valid_size(&key) {
|
FieldDocIdFacetStringCodec::serialize_into(field_id, document_id, value, &mut buffer);
|
||||||
sorter.insert(&key, &[])?;
|
|
||||||
|
if lmdb_key_valid_size(&buffer) {
|
||||||
|
sorter.insert(&buffer, &[])?;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write_word_docids<I>(sorter: &mut Sorter<MergeFn>, iter: I) -> anyhow::Result<()>
|
fn write_word_docids<I, E>(sorter: &mut Sorter<MergeFn<E>>, iter: I) -> Result<()>
|
||||||
where I: IntoIterator<Item=(SmallVec32<u8>, RoaringBitmap)>
|
where
|
||||||
|
I: IntoIterator<Item=(SmallVec32<u8>, RoaringBitmap)>,
|
||||||
|
Error: From<E>,
|
||||||
{
|
{
|
||||||
let mut key = Vec::new();
|
let mut key = Vec::new();
|
||||||
let mut buffer = Vec::new();
|
let mut buffer = Vec::new();
|
||||||
@ -589,7 +610,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
|||||||
num_threads: usize,
|
num_threads: usize,
|
||||||
log_every_n: Option<usize>,
|
log_every_n: Option<usize>,
|
||||||
mut progress_callback: F,
|
mut progress_callback: F,
|
||||||
) -> anyhow::Result<Readers>
|
) -> Result<Readers>
|
||||||
where F: FnMut(UpdateIndexingStep),
|
where F: FnMut(UpdateIndexingStep),
|
||||||
{
|
{
|
||||||
debug!("{:?}: Indexing in a Store...", thread_index);
|
debug!("{:?}: Indexing in a Store...", thread_index);
|
||||||
@ -618,7 +639,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
|||||||
|
|
||||||
for (attr, content) in document.iter() {
|
for (attr, content) in document.iter() {
|
||||||
if self.faceted_fields.contains(&attr) || self.searchable_fields.contains(&attr) {
|
if self.faceted_fields.contains(&attr) || self.searchable_fields.contains(&attr) {
|
||||||
let value = serde_json::from_slice(content)?;
|
let value = serde_json::from_slice(content).map_err(InternalError::SerdeJson)?;
|
||||||
|
|
||||||
let (facet_numbers, facet_strings) = extract_facet_values(&value);
|
let (facet_numbers, facet_strings) = extract_facet_values(&value);
|
||||||
facet_numbers_values.entry(attr).or_insert_with(Vec::new).extend(facet_numbers);
|
facet_numbers_values.entry(attr).or_insert_with(Vec::new).extend(facet_numbers);
|
||||||
@ -672,7 +693,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
|||||||
Ok(readers)
|
Ok(readers)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn finish(mut self) -> anyhow::Result<Readers> {
|
fn finish(mut self) -> Result<Readers> {
|
||||||
let comp_type = self.chunk_compression_type;
|
let comp_type = self.chunk_compression_type;
|
||||||
let comp_level = self.chunk_compression_level;
|
let comp_level = self.chunk_compression_level;
|
||||||
let shrink_size = self.chunk_fusing_shrink_size;
|
let shrink_size = self.chunk_fusing_shrink_size;
|
||||||
@ -706,7 +727,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
|||||||
let mut docids_buffer = Vec::new();
|
let mut docids_buffer = Vec::new();
|
||||||
for ((fid, count), docids) in self.field_id_word_count_docids {
|
for ((fid, count), docids) in self.field_id_word_count_docids {
|
||||||
docids_buffer.clear();
|
docids_buffer.clear();
|
||||||
CboRoaringBitmapCodec::serialize_into(&docids, &mut docids_buffer)?;
|
CboRoaringBitmapCodec::serialize_into(&docids, &mut docids_buffer);
|
||||||
self.field_id_word_count_docids_sorter.insert([fid, count], &docids_buffer)?;
|
self.field_id_word_count_docids_sorter.insert([fid, count], &docids_buffer)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2,16 +2,20 @@ use std::borrow::Cow;
|
|||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{Read, Seek, SeekFrom};
|
use std::io::{Read, Seek, SeekFrom};
|
||||||
use std::iter::Peekable;
|
use std::iter::Peekable;
|
||||||
|
use std::result::Result as StdResult;
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
|
||||||
use anyhow::{anyhow, Context};
|
|
||||||
use grenad::CompressionType;
|
use grenad::CompressionType;
|
||||||
use log::info;
|
use log::info;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use serde_json::{Map, Value};
|
use serde_json::{Map, Value};
|
||||||
|
|
||||||
use crate::{Index, BEU32, MergeFn, FieldsIdsMap, ExternalDocumentsIds, FieldId, FieldsDistribution};
|
use crate::error::{Error, UserError, InternalError};
|
||||||
|
use crate::index::db_name;
|
||||||
|
use crate::update::index_documents::merge_function::{merge_obkvs, keep_latest_obkv};
|
||||||
use crate::update::{AvailableDocumentsIds, UpdateIndexingStep};
|
use crate::update::{AvailableDocumentsIds, UpdateIndexingStep};
|
||||||
|
use crate::{BEU32, MergeFn, FieldsIdsMap, ExternalDocumentsIds, FieldId, FieldsDistribution};
|
||||||
|
use crate::{Index, Result};
|
||||||
use super::merge_function::merge_two_obkvs;
|
use super::merge_function::merge_two_obkvs;
|
||||||
use super::{create_writer, create_sorter, IndexDocumentsMethod};
|
use super::{create_writer, create_sorter, IndexDocumentsMethod};
|
||||||
|
|
||||||
@ -52,7 +56,7 @@ fn is_primary_key(field: impl AsRef<str>) -> bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Transform<'_, '_> {
|
impl Transform<'_, '_> {
|
||||||
pub fn output_from_json<R, F>(self, reader: R, progress_callback: F) -> anyhow::Result<TransformOutput>
|
pub fn output_from_json<R, F>(self, reader: R, progress_callback: F) -> Result<TransformOutput>
|
||||||
where
|
where
|
||||||
R: Read,
|
R: Read,
|
||||||
F: Fn(UpdateIndexingStep) + Sync,
|
F: Fn(UpdateIndexingStep) + Sync,
|
||||||
@ -60,7 +64,7 @@ impl Transform<'_, '_> {
|
|||||||
self.output_from_generic_json(reader, false, progress_callback)
|
self.output_from_generic_json(reader, false, progress_callback)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn output_from_json_stream<R, F>(self, reader: R, progress_callback: F) -> anyhow::Result<TransformOutput>
|
pub fn output_from_json_stream<R, F>(self, reader: R, progress_callback: F) -> Result<TransformOutput>
|
||||||
where
|
where
|
||||||
R: Read,
|
R: Read,
|
||||||
F: Fn(UpdateIndexingStep) + Sync,
|
F: Fn(UpdateIndexingStep) + Sync,
|
||||||
@ -73,7 +77,7 @@ impl Transform<'_, '_> {
|
|||||||
reader: R,
|
reader: R,
|
||||||
is_stream: bool,
|
is_stream: bool,
|
||||||
progress_callback: F,
|
progress_callback: F,
|
||||||
) -> anyhow::Result<TransformOutput>
|
) -> Result<TransformOutput>
|
||||||
where
|
where
|
||||||
R: Read,
|
R: Read,
|
||||||
F: Fn(UpdateIndexingStep) + Sync,
|
F: Fn(UpdateIndexingStep) + Sync,
|
||||||
@ -87,7 +91,7 @@ impl Transform<'_, '_> {
|
|||||||
let iter = Box::new(iter) as Box<dyn Iterator<Item=_>>;
|
let iter = Box::new(iter) as Box<dyn Iterator<Item=_>>;
|
||||||
iter.peekable()
|
iter.peekable()
|
||||||
} else {
|
} else {
|
||||||
let vec: Vec<_> = serde_json::from_reader(reader)?;
|
let vec: Vec<_> = serde_json::from_reader(reader).map_err(UserError::SerdeJson)?;
|
||||||
let iter = vec.into_iter().map(Ok);
|
let iter = vec.into_iter().map(Ok);
|
||||||
let iter = Box::new(iter) as Box<dyn Iterator<Item=_>>;
|
let iter = Box::new(iter) as Box<dyn Iterator<Item=_>>;
|
||||||
iter.peekable()
|
iter.peekable()
|
||||||
@ -95,9 +99,12 @@ impl Transform<'_, '_> {
|
|||||||
|
|
||||||
// We extract the primary key from the first document in
|
// We extract the primary key from the first document in
|
||||||
// the batch if it hasn't already been defined in the index
|
// the batch if it hasn't already been defined in the index
|
||||||
let first = match documents.peek().map(Result::as_ref).transpose() {
|
let first = match documents.peek().map(StdResult::as_ref).transpose() {
|
||||||
Ok(first) => first,
|
Ok(first) => first,
|
||||||
Err(_) => return Err(documents.next().unwrap().unwrap_err().into()),
|
Err(_) => {
|
||||||
|
let error = documents.next().unwrap().unwrap_err();
|
||||||
|
return Err(UserError::SerdeJson(error).into());
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
let alternative_name = first.and_then(|doc| doc.keys().find(|f| is_primary_key(f)).cloned());
|
let alternative_name = first.and_then(|doc| doc.keys().find(|f| is_primary_key(f)).cloned());
|
||||||
@ -144,7 +151,7 @@ impl Transform<'_, '_> {
|
|||||||
let mut documents_count = 0;
|
let mut documents_count = 0;
|
||||||
|
|
||||||
for result in documents {
|
for result in documents {
|
||||||
let document = result?;
|
let document = result.map_err(UserError::SerdeJson)?;
|
||||||
|
|
||||||
if self.log_every_n.map_or(false, |len| documents_count % len == 0) {
|
if self.log_every_n.map_or(false, |len| documents_count % len == 0) {
|
||||||
progress_callback(UpdateIndexingStep::TransformFromUserIntoGenericFormat {
|
progress_callback(UpdateIndexingStep::TransformFromUserIntoGenericFormat {
|
||||||
@ -157,7 +164,7 @@ impl Transform<'_, '_> {
|
|||||||
|
|
||||||
// We prepare the fields ids map with the documents keys.
|
// We prepare the fields ids map with the documents keys.
|
||||||
for (key, _value) in &document {
|
for (key, _value) in &document {
|
||||||
fields_ids_map.insert(&key).context("field id limit reached")?;
|
fields_ids_map.insert(&key).ok_or(UserError::AttributeLimitReached)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
// We retrieve the user id from the document based on the primary key name,
|
// We retrieve the user id from the document based on the primary key name,
|
||||||
@ -166,11 +173,13 @@ impl Transform<'_, '_> {
|
|||||||
Some(value) => match value {
|
Some(value) => match value {
|
||||||
Value::String(string) => Cow::Borrowed(string.as_str()),
|
Value::String(string) => Cow::Borrowed(string.as_str()),
|
||||||
Value::Number(number) => Cow::Owned(number.to_string()),
|
Value::Number(number) => Cow::Owned(number.to_string()),
|
||||||
_ => return Err(anyhow!("documents ids must be either strings or numbers")),
|
content => return Err(UserError::InvalidDocumentId {
|
||||||
|
document_id: content.clone(),
|
||||||
|
}.into()),
|
||||||
},
|
},
|
||||||
None => {
|
None => {
|
||||||
if !self.autogenerate_docids {
|
if !self.autogenerate_docids {
|
||||||
return Err(anyhow!("missing primary key"));
|
return Err(UserError::MissingPrimaryKey.into());
|
||||||
}
|
}
|
||||||
let uuid = uuid::Uuid::new_v4().to_hyphenated().encode_lower(&mut uuid_buffer);
|
let uuid = uuid::Uuid::new_v4().to_hyphenated().encode_lower(&mut uuid_buffer);
|
||||||
Cow::Borrowed(uuid)
|
Cow::Borrowed(uuid)
|
||||||
@ -185,13 +194,15 @@ impl Transform<'_, '_> {
|
|||||||
// and this should be the document id we return the one we generated.
|
// and this should be the document id we return the one we generated.
|
||||||
if let Some(value) = document.get(name) {
|
if let Some(value) = document.get(name) {
|
||||||
// We serialize the attribute values.
|
// We serialize the attribute values.
|
||||||
serde_json::to_writer(&mut json_buffer, value)?;
|
serde_json::to_writer(&mut json_buffer, value).map_err(InternalError::SerdeJson)?;
|
||||||
writer.insert(field_id, &json_buffer)?;
|
writer.insert(field_id, &json_buffer)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
// We validate the document id [a-zA-Z0-9\-_].
|
// We validate the document id [a-zA-Z0-9\-_].
|
||||||
if field_id == primary_key_id && validate_document_id(&external_id).is_none() {
|
if field_id == primary_key_id && validate_document_id(&external_id).is_none() {
|
||||||
return Err(anyhow!("invalid document id: {:?}", external_id));
|
return Err(UserError::InvalidDocumentId {
|
||||||
|
document_id: Value::from(external_id),
|
||||||
|
}.into());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -216,7 +227,7 @@ impl Transform<'_, '_> {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn output_from_csv<R, F>(self, reader: R, progress_callback: F) -> anyhow::Result<TransformOutput>
|
pub fn output_from_csv<R, F>(self, reader: R, progress_callback: F) -> Result<TransformOutput>
|
||||||
where
|
where
|
||||||
R: Read,
|
R: Read,
|
||||||
F: Fn(UpdateIndexingStep) + Sync,
|
F: Fn(UpdateIndexingStep) + Sync,
|
||||||
@ -225,12 +236,12 @@ impl Transform<'_, '_> {
|
|||||||
let external_documents_ids = self.index.external_documents_ids(self.rtxn).unwrap();
|
let external_documents_ids = self.index.external_documents_ids(self.rtxn).unwrap();
|
||||||
|
|
||||||
let mut csv = csv::Reader::from_reader(reader);
|
let mut csv = csv::Reader::from_reader(reader);
|
||||||
let headers = csv.headers()?;
|
let headers = csv.headers().map_err(UserError::Csv)?;
|
||||||
|
|
||||||
let mut fields_ids = Vec::new();
|
let mut fields_ids = Vec::new();
|
||||||
// Generate the new fields ids based on the current fields ids and this CSV headers.
|
// Generate the new fields ids based on the current fields ids and this CSV headers.
|
||||||
for (i, header) in headers.iter().enumerate() {
|
for (i, header) in headers.iter().enumerate() {
|
||||||
let id = fields_ids_map.insert(header).context("field id limit reached)")?;
|
let id = fields_ids_map.insert(header).ok_or(UserError::AttributeLimitReached)?;
|
||||||
fields_ids.push((id, i));
|
fields_ids.push((id, i));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -246,7 +257,7 @@ impl Transform<'_, '_> {
|
|||||||
// Returns the field id in the fields ids map, create an "id" field
|
// Returns the field id in the fields ids map, create an "id" field
|
||||||
// in case it is not in the current headers.
|
// in case it is not in the current headers.
|
||||||
let alternative_name = primary_key_pos.map(|pos| headers[pos].to_string());
|
let alternative_name = primary_key_pos.map(|pos| headers[pos].to_string());
|
||||||
let (primary_key_id, _) = compute_primary_key_pair(
|
let (primary_key_id, primary_key_name) = compute_primary_key_pair(
|
||||||
self.index.primary_key(self.rtxn)?,
|
self.index.primary_key(self.rtxn)?,
|
||||||
&mut fields_ids_map,
|
&mut fields_ids_map,
|
||||||
alternative_name,
|
alternative_name,
|
||||||
@ -280,7 +291,7 @@ impl Transform<'_, '_> {
|
|||||||
let mut documents_count = 0;
|
let mut documents_count = 0;
|
||||||
|
|
||||||
let mut record = csv::StringRecord::new();
|
let mut record = csv::StringRecord::new();
|
||||||
while csv.read_record(&mut record)? {
|
while csv.read_record(&mut record).map_err(UserError::Csv)? {
|
||||||
obkv_buffer.clear();
|
obkv_buffer.clear();
|
||||||
let mut writer = obkv::KvWriter::new(&mut obkv_buffer);
|
let mut writer = obkv::KvWriter::new(&mut obkv_buffer);
|
||||||
|
|
||||||
@ -297,7 +308,9 @@ impl Transform<'_, '_> {
|
|||||||
// We validate the document id [a-zA-Z0-9\-_].
|
// We validate the document id [a-zA-Z0-9\-_].
|
||||||
match validate_document_id(&external_id) {
|
match validate_document_id(&external_id) {
|
||||||
Some(valid) => valid,
|
Some(valid) => valid,
|
||||||
None => return Err(anyhow!("invalid document id: {:?}", external_id)),
|
None => return Err(UserError::InvalidDocumentId {
|
||||||
|
document_id: Value::from(external_id),
|
||||||
|
}.into()),
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
None => uuid::Uuid::new_v4().to_hyphenated().encode_lower(&mut uuid_buffer),
|
None => uuid::Uuid::new_v4().to_hyphenated().encode_lower(&mut uuid_buffer),
|
||||||
@ -315,7 +328,7 @@ impl Transform<'_, '_> {
|
|||||||
for (field_id, field) in iter {
|
for (field_id, field) in iter {
|
||||||
// We serialize the attribute values as JSON strings.
|
// We serialize the attribute values as JSON strings.
|
||||||
json_buffer.clear();
|
json_buffer.clear();
|
||||||
serde_json::to_writer(&mut json_buffer, &field)?;
|
serde_json::to_writer(&mut json_buffer, &field).map_err(InternalError::SerdeJson)?;
|
||||||
writer.insert(*field_id, &json_buffer)?;
|
writer.insert(*field_id, &json_buffer)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -330,10 +343,6 @@ impl Transform<'_, '_> {
|
|||||||
|
|
||||||
// Now that we have a valid sorter that contains the user id and the obkv we
|
// Now that we have a valid sorter that contains the user id and the obkv we
|
||||||
// give it to the last transforming function which returns the TransformOutput.
|
// give it to the last transforming function which returns the TransformOutput.
|
||||||
let primary_key_name = fields_ids_map
|
|
||||||
.name(primary_key_id)
|
|
||||||
.map(String::from)
|
|
||||||
.expect("Primary key must be present in fields id map");
|
|
||||||
self.output_from_sorter(
|
self.output_from_sorter(
|
||||||
sorter,
|
sorter,
|
||||||
primary_key_name,
|
primary_key_name,
|
||||||
@ -347,17 +356,18 @@ impl Transform<'_, '_> {
|
|||||||
/// Generate the `TransformOutput` based on the given sorter that can be generated from any
|
/// Generate the `TransformOutput` based on the given sorter that can be generated from any
|
||||||
/// format like CSV, JSON or JSON stream. This sorter must contain a key that is the document
|
/// format like CSV, JSON or JSON stream. This sorter must contain a key that is the document
|
||||||
/// id for the user side and the value must be an obkv where keys are valid fields ids.
|
/// id for the user side and the value must be an obkv where keys are valid fields ids.
|
||||||
fn output_from_sorter<F>(
|
fn output_from_sorter<F, E>(
|
||||||
self,
|
self,
|
||||||
sorter: grenad::Sorter<MergeFn>,
|
sorter: grenad::Sorter<MergeFn<E>>,
|
||||||
primary_key: String,
|
primary_key: String,
|
||||||
fields_ids_map: FieldsIdsMap,
|
fields_ids_map: FieldsIdsMap,
|
||||||
approximate_number_of_documents: usize,
|
approximate_number_of_documents: usize,
|
||||||
mut external_documents_ids: ExternalDocumentsIds<'_>,
|
mut external_documents_ids: ExternalDocumentsIds<'_>,
|
||||||
progress_callback: F,
|
progress_callback: F,
|
||||||
) -> anyhow::Result<TransformOutput>
|
) -> Result<TransformOutput>
|
||||||
where
|
where
|
||||||
F: Fn(UpdateIndexingStep) + Sync,
|
F: Fn(UpdateIndexingStep) + Sync,
|
||||||
|
Error: From<E>,
|
||||||
{
|
{
|
||||||
let documents_ids = self.index.documents_ids(self.rtxn)?;
|
let documents_ids = self.index.documents_ids(self.rtxn)?;
|
||||||
let mut fields_distribution = self.index.fields_distribution(self.rtxn)?;
|
let mut fields_distribution = self.index.fields_distribution(self.rtxn)?;
|
||||||
@ -365,7 +375,7 @@ impl Transform<'_, '_> {
|
|||||||
|
|
||||||
// Once we have sort and deduplicated the documents we write them into a final file.
|
// Once we have sort and deduplicated the documents we write them into a final file.
|
||||||
let mut final_sorter = create_sorter(
|
let mut final_sorter = create_sorter(
|
||||||
|_docid, _obkvs| Err(anyhow!("cannot merge two documents")),
|
|_id, _obkvs| Err(InternalError::IndexingMergingKeys { process: "documents" }),
|
||||||
self.chunk_compression_type,
|
self.chunk_compression_type,
|
||||||
self.chunk_compression_level,
|
self.chunk_compression_level,
|
||||||
self.chunk_fusing_shrink_size,
|
self.chunk_fusing_shrink_size,
|
||||||
@ -401,7 +411,10 @@ impl Transform<'_, '_> {
|
|||||||
IndexDocumentsMethod::UpdateDocuments => {
|
IndexDocumentsMethod::UpdateDocuments => {
|
||||||
let key = BEU32::new(docid);
|
let key = BEU32::new(docid);
|
||||||
let base_obkv = self.index.documents.get(&self.rtxn, &key)?
|
let base_obkv = self.index.documents.get(&self.rtxn, &key)?
|
||||||
.context("document not found")?;
|
.ok_or(InternalError::DatabaseMissingEntry {
|
||||||
|
db_name: db_name::DOCUMENTS,
|
||||||
|
key: None,
|
||||||
|
})?;
|
||||||
let update_obkv = obkv::KvReader::new(update_obkv);
|
let update_obkv = obkv::KvReader::new(update_obkv);
|
||||||
merge_two_obkvs(base_obkv, update_obkv, &mut obkv_buffer);
|
merge_two_obkvs(base_obkv, update_obkv, &mut obkv_buffer);
|
||||||
(docid, obkv_buffer.as_slice())
|
(docid, obkv_buffer.as_slice())
|
||||||
@ -412,7 +425,7 @@ impl Transform<'_, '_> {
|
|||||||
// If this user id is new we add it to the external documents ids map
|
// If this user id is new we add it to the external documents ids map
|
||||||
// for new ids and into the list of new documents.
|
// for new ids and into the list of new documents.
|
||||||
let new_docid = available_documents_ids.next()
|
let new_docid = available_documents_ids.next()
|
||||||
.context("no more available documents ids")?;
|
.ok_or(UserError::DocumentLimitReached)?;
|
||||||
new_external_documents_ids_builder.insert(external_id, new_docid as u64)?;
|
new_external_documents_ids_builder.insert(external_id, new_docid as u64)?;
|
||||||
new_documents_ids.insert(new_docid);
|
new_documents_ids.insert(new_docid);
|
||||||
(new_docid, update_obkv)
|
(new_docid, update_obkv)
|
||||||
@ -472,7 +485,7 @@ impl Transform<'_, '_> {
|
|||||||
primary_key: String,
|
primary_key: String,
|
||||||
old_fields_ids_map: FieldsIdsMap,
|
old_fields_ids_map: FieldsIdsMap,
|
||||||
new_fields_ids_map: FieldsIdsMap,
|
new_fields_ids_map: FieldsIdsMap,
|
||||||
) -> anyhow::Result<TransformOutput>
|
) -> Result<TransformOutput>
|
||||||
{
|
{
|
||||||
let fields_distribution = self.index.fields_distribution(self.rtxn)?;
|
let fields_distribution = self.index.fields_distribution(self.rtxn)?;
|
||||||
let external_documents_ids = self.index.external_documents_ids(self.rtxn)?;
|
let external_documents_ids = self.index.external_documents_ids(self.rtxn)?;
|
||||||
@ -532,10 +545,10 @@ fn compute_primary_key_pair(
|
|||||||
fields_ids_map: &mut FieldsIdsMap,
|
fields_ids_map: &mut FieldsIdsMap,
|
||||||
alternative_name: Option<String>,
|
alternative_name: Option<String>,
|
||||||
autogenerate_docids: bool,
|
autogenerate_docids: bool,
|
||||||
) -> anyhow::Result<(FieldId, String)> {
|
) -> Result<(FieldId, String)> {
|
||||||
match primary_key {
|
match primary_key {
|
||||||
Some(primary_key) => {
|
Some(primary_key) => {
|
||||||
let id = fields_ids_map.insert(primary_key).ok_or(anyhow!("Maximum number of fields exceeded"))?;
|
let id = fields_ids_map.insert(primary_key).ok_or(UserError::AttributeLimitReached)?;
|
||||||
Ok((id, primary_key.to_string()))
|
Ok((id, primary_key.to_string()))
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
@ -545,35 +558,17 @@ fn compute_primary_key_pair(
|
|||||||
if !autogenerate_docids {
|
if !autogenerate_docids {
|
||||||
// If there is no primary key in the current document batch, we must
|
// If there is no primary key in the current document batch, we must
|
||||||
// return an error and not automatically generate any document id.
|
// return an error and not automatically generate any document id.
|
||||||
anyhow::bail!("missing primary key")
|
return Err(UserError::MissingPrimaryKey.into());
|
||||||
}
|
}
|
||||||
DEFAULT_PRIMARY_KEY_NAME.to_string()
|
DEFAULT_PRIMARY_KEY_NAME.to_string()
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
let id = fields_ids_map.insert(&name).context("field id limit reached")?;
|
let id = fields_ids_map.insert(&name).ok_or(UserError::AttributeLimitReached)?;
|
||||||
Ok((id, name))
|
Ok((id, name))
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Only the last value associated with an id is kept.
|
|
||||||
fn keep_latest_obkv(_key: &[u8], obkvs: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
|
|
||||||
obkvs.last().context("no last value").map(|last| last.clone().into_owned())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Merge all the obks in the order we see them.
|
|
||||||
fn merge_obkvs(_key: &[u8], obkvs: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
|
|
||||||
let mut iter = obkvs.iter();
|
|
||||||
let first = iter.next().map(|b| b.clone().into_owned()).context("no first value")?;
|
|
||||||
Ok(iter.fold(first, |acc, current| {
|
|
||||||
let first = obkv::KvReader::new(&acc);
|
|
||||||
let second = obkv::KvReader::new(current);
|
|
||||||
let mut buffer = Vec::new();
|
|
||||||
merge_two_obkvs(first, second, &mut buffer);
|
|
||||||
buffer
|
|
||||||
}))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn validate_document_id(document_id: &str) -> Option<&str> {
|
fn validate_document_id(document_id: &str) -> Option<&str> {
|
||||||
let document_id = document_id.trim();
|
let document_id = document_id.trim();
|
||||||
Some(document_id).filter(|id| {
|
Some(document_id).filter(|id| {
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
use std::collections::{BTreeSet, HashMap, HashSet};
|
use std::collections::{BTreeSet, HashMap, HashSet};
|
||||||
|
use std::result::Result as StdResult;
|
||||||
|
|
||||||
use anyhow::Context;
|
|
||||||
use chrono::Utc;
|
use chrono::Utc;
|
||||||
use grenad::CompressionType;
|
use grenad::CompressionType;
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
@ -9,9 +9,10 @@ use rayon::ThreadPool;
|
|||||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||||
|
|
||||||
use crate::criterion::Criterion;
|
use crate::criterion::Criterion;
|
||||||
|
use crate::error::UserError;
|
||||||
use crate::update::index_documents::{IndexDocumentsMethod, Transform};
|
use crate::update::index_documents::{IndexDocumentsMethod, Transform};
|
||||||
use crate::update::{ClearDocuments, IndexDocuments, UpdateIndexingStep};
|
use crate::update::{ClearDocuments, IndexDocuments, UpdateIndexingStep};
|
||||||
use crate::{FieldsIdsMap, Index};
|
use crate::{FieldsIdsMap, Index, Result};
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq)]
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
pub enum Setting<T> {
|
pub enum Setting<T> {
|
||||||
@ -33,7 +34,7 @@ impl<T> Setting<T> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<T: Serialize> Serialize for Setting<T> {
|
impl<T: Serialize> Serialize for Setting<T> {
|
||||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: Serializer {
|
fn serialize<S>(&self, serializer: S) -> StdResult<S::Ok, S::Error> where S: Serializer {
|
||||||
match self {
|
match self {
|
||||||
Self::Set(value) => Some(value),
|
Self::Set(value) => Some(value),
|
||||||
// Usually not_set isn't serialized by setting skip_serializing_if field attribute
|
// Usually not_set isn't serialized by setting skip_serializing_if field attribute
|
||||||
@ -43,7 +44,7 @@ impl<T: Serialize> Serialize for Setting<T> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<'de, T: Deserialize<'de>> Deserialize<'de> for Setting<T> {
|
impl<'de, T: Deserialize<'de>> Deserialize<'de> for Setting<T> {
|
||||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where D: Deserializer<'de> {
|
fn deserialize<D>(deserializer: D) -> StdResult<Self, D::Error> where D: Deserializer<'de> {
|
||||||
Deserialize::deserialize(deserializer).map(|x| match x {
|
Deserialize::deserialize(deserializer).map(|x| match x {
|
||||||
Some(x) => Self::Set(x),
|
Some(x) => Self::Set(x),
|
||||||
None => Self::Reset, // Reset is forced by sending null value
|
None => Self::Reset, // Reset is forced by sending null value
|
||||||
@ -71,6 +72,7 @@ pub struct Settings<'a, 't, 'u, 'i> {
|
|||||||
stop_words: Setting<BTreeSet<String>>,
|
stop_words: Setting<BTreeSet<String>>,
|
||||||
distinct_field: Setting<String>,
|
distinct_field: Setting<String>,
|
||||||
synonyms: Setting<HashMap<String, Vec<String>>>,
|
synonyms: Setting<HashMap<String, Vec<String>>>,
|
||||||
|
primary_key: Setting<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||||
@ -97,6 +99,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
stop_words: Setting::NotSet,
|
stop_words: Setting::NotSet,
|
||||||
distinct_field: Setting::NotSet,
|
distinct_field: Setting::NotSet,
|
||||||
synonyms: Setting::NotSet,
|
synonyms: Setting::NotSet,
|
||||||
|
primary_key: Setting::NotSet,
|
||||||
update_id,
|
update_id,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -165,7 +168,15 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> anyhow::Result<()>
|
pub fn reset_primary_key(&mut self) {
|
||||||
|
self.primary_key = Setting::Reset;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn set_primary_key(&mut self, primary_key: String) {
|
||||||
|
self.primary_key = Setting::Set(primary_key);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> Result<()>
|
||||||
where
|
where
|
||||||
F: Fn(UpdateIndexingStep, u64) + Sync
|
F: Fn(UpdateIndexingStep, u64) + Sync
|
||||||
{
|
{
|
||||||
@ -192,7 +203,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// There already has been a document addition, the primary key should be set by now.
|
// There already has been a document addition, the primary key should be set by now.
|
||||||
let primary_key = self.index.primary_key(&self.wtxn)?.context("Index must have a primary key")?;
|
let primary_key = self.index.primary_key(&self.wtxn)?.ok_or(UserError::MissingPrimaryKey)?;
|
||||||
|
|
||||||
// We remap the documents fields based on the new `FieldsIdsMap`.
|
// We remap the documents fields based on the new `FieldsIdsMap`.
|
||||||
let output = transform.remap_index_documents(
|
let output = transform.remap_index_documents(
|
||||||
@ -220,7 +231,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn update_displayed(&mut self) -> anyhow::Result<bool> {
|
fn update_displayed(&mut self) -> Result<bool> {
|
||||||
match self.displayed_fields {
|
match self.displayed_fields {
|
||||||
Setting::Set(ref fields) => {
|
Setting::Set(ref fields) => {
|
||||||
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||||
@ -234,7 +245,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
for name in names.iter() {
|
for name in names.iter() {
|
||||||
fields_ids_map
|
fields_ids_map
|
||||||
.insert(name)
|
.insert(name)
|
||||||
.context("field id limit exceeded")?;
|
.ok_or(UserError::AttributeLimitReached)?;
|
||||||
}
|
}
|
||||||
self.index.put_displayed_fields(self.wtxn, &names)?;
|
self.index.put_displayed_fields(self.wtxn, &names)?;
|
||||||
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
|
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
|
||||||
@ -245,13 +256,13 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
Ok(true)
|
Ok(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn update_distinct_field(&mut self) -> anyhow::Result<bool> {
|
fn update_distinct_field(&mut self) -> Result<bool> {
|
||||||
match self.distinct_field {
|
match self.distinct_field {
|
||||||
Setting::Set(ref attr) => {
|
Setting::Set(ref attr) => {
|
||||||
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||||
fields_ids_map
|
fields_ids_map
|
||||||
.insert(attr)
|
.insert(attr)
|
||||||
.context("field id limit exceeded")?;
|
.ok_or(UserError::AttributeLimitReached)?;
|
||||||
|
|
||||||
self.index.put_distinct_field(self.wtxn, &attr)?;
|
self.index.put_distinct_field(self.wtxn, &attr)?;
|
||||||
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
|
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
|
||||||
@ -264,7 +275,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
|
|
||||||
/// Updates the index's searchable attributes. This causes the field map to be recomputed to
|
/// Updates the index's searchable attributes. This causes the field map to be recomputed to
|
||||||
/// reflect the order of the searchable attributes.
|
/// reflect the order of the searchable attributes.
|
||||||
fn update_searchable(&mut self) -> anyhow::Result<bool> {
|
fn update_searchable(&mut self) -> Result<bool> {
|
||||||
match self.searchable_fields {
|
match self.searchable_fields {
|
||||||
Setting::Set(ref fields) => {
|
Setting::Set(ref fields) => {
|
||||||
// every time the searchable attributes are updated, we need to update the
|
// every time the searchable attributes are updated, we need to update the
|
||||||
@ -285,13 +296,13 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
for name in names.iter() {
|
for name in names.iter() {
|
||||||
new_fields_ids_map
|
new_fields_ids_map
|
||||||
.insert(&name)
|
.insert(&name)
|
||||||
.context("field id limit exceeded")?;
|
.ok_or(UserError::AttributeLimitReached)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (_, name) in old_fields_ids_map.iter() {
|
for (_, name) in old_fields_ids_map.iter() {
|
||||||
new_fields_ids_map
|
new_fields_ids_map
|
||||||
.insert(&name)
|
.insert(&name)
|
||||||
.context("field id limit exceeded")?;
|
.ok_or(UserError::AttributeLimitReached)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
self.index.put_searchable_fields(self.wtxn, &names)?;
|
self.index.put_searchable_fields(self.wtxn, &names)?;
|
||||||
@ -303,7 +314,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
Ok(true)
|
Ok(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn update_stop_words(&mut self) -> anyhow::Result<bool> {
|
fn update_stop_words(&mut self) -> Result<bool> {
|
||||||
match self.stop_words {
|
match self.stop_words {
|
||||||
Setting::Set(ref stop_words) => {
|
Setting::Set(ref stop_words) => {
|
||||||
let current = self.index.stop_words(self.wtxn)?;
|
let current = self.index.stop_words(self.wtxn)?;
|
||||||
@ -325,7 +336,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn update_synonyms(&mut self) -> anyhow::Result<bool> {
|
fn update_synonyms(&mut self) -> Result<bool> {
|
||||||
match self.synonyms {
|
match self.synonyms {
|
||||||
Setting::Set(ref synonyms) => {
|
Setting::Set(ref synonyms) => {
|
||||||
fn normalize(analyzer: &Analyzer<&[u8]>, text: &str) -> Vec<String> {
|
fn normalize(analyzer: &Analyzer<&[u8]>, text: &str) -> Vec<String> {
|
||||||
@ -383,13 +394,13 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn update_filterable(&mut self) -> anyhow::Result<()> {
|
fn update_filterable(&mut self) -> Result<()> {
|
||||||
match self.filterable_fields {
|
match self.filterable_fields {
|
||||||
Setting::Set(ref fields) => {
|
Setting::Set(ref fields) => {
|
||||||
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||||
let mut new_facets = HashSet::new();
|
let mut new_facets = HashSet::new();
|
||||||
for name in fields {
|
for name in fields {
|
||||||
fields_ids_map.insert(name).context("field id limit exceeded")?;
|
fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?;
|
||||||
new_facets.insert(name.clone());
|
new_facets.insert(name.clone());
|
||||||
}
|
}
|
||||||
self.index.put_filterable_fields(self.wtxn, &new_facets)?;
|
self.index.put_filterable_fields(self.wtxn, &new_facets)?;
|
||||||
@ -401,7 +412,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn update_criteria(&mut self) -> anyhow::Result<()> {
|
fn update_criteria(&mut self) -> Result<()> {
|
||||||
match self.criteria {
|
match self.criteria {
|
||||||
Setting::Set(ref fields) => {
|
Setting::Set(ref fields) => {
|
||||||
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||||
@ -409,7 +420,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
for name in fields {
|
for name in fields {
|
||||||
let criterion: Criterion = name.parse()?;
|
let criterion: Criterion = name.parse()?;
|
||||||
if let Some(name) = criterion.field_name() {
|
if let Some(name) = criterion.field_name() {
|
||||||
fields_ids_map.insert(name).context("field id limit exceeded")?;
|
fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?;
|
||||||
}
|
}
|
||||||
new_criteria.push(criterion);
|
new_criteria.push(criterion);
|
||||||
}
|
}
|
||||||
@ -422,7 +433,32 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn execute<F>(mut self, progress_callback: F) -> anyhow::Result<()>
|
fn update_primary_key(&mut self) -> Result<()> {
|
||||||
|
match self.primary_key {
|
||||||
|
Setting::Set(ref primary_key) => {
|
||||||
|
if self.index.number_of_documents(&self.wtxn)? == 0 {
|
||||||
|
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||||
|
fields_ids_map.insert(primary_key).ok_or(UserError::AttributeLimitReached)?;
|
||||||
|
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
|
||||||
|
self.index.put_primary_key(self.wtxn, primary_key)?;
|
||||||
|
Ok(())
|
||||||
|
} else {
|
||||||
|
Err(UserError::PrimaryKeyCannotBeChanged.into())
|
||||||
|
}
|
||||||
|
},
|
||||||
|
Setting::Reset => {
|
||||||
|
if self.index.number_of_documents(&self.wtxn)? == 0 {
|
||||||
|
self.index.delete_primary_key(self.wtxn)?;
|
||||||
|
Ok(())
|
||||||
|
} else {
|
||||||
|
Err(UserError::PrimaryKeyCannotBeReset.into())
|
||||||
|
}
|
||||||
|
},
|
||||||
|
Setting::NotSet => Ok(()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn execute<F>(mut self, progress_callback: F) -> Result<()>
|
||||||
where
|
where
|
||||||
F: Fn(UpdateIndexingStep, u64) + Sync
|
F: Fn(UpdateIndexingStep, u64) + Sync
|
||||||
{
|
{
|
||||||
@ -435,6 +471,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
self.update_filterable()?;
|
self.update_filterable()?;
|
||||||
self.update_distinct_field()?;
|
self.update_distinct_field()?;
|
||||||
self.update_criteria()?;
|
self.update_criteria()?;
|
||||||
|
self.update_primary_key()?;
|
||||||
|
|
||||||
// If there is new faceted fields we indicate that we must reindex as we must
|
// If there is new faceted fields we indicate that we must reindex as we must
|
||||||
// index new fields as facets. It means that the distinct attribute,
|
// index new fields as facets. It means that the distinct attribute,
|
||||||
@ -461,8 +498,9 @@ mod tests {
|
|||||||
use maplit::{btreeset, hashmap, hashset};
|
use maplit::{btreeset, hashmap, hashset};
|
||||||
use big_s::S;
|
use big_s::S;
|
||||||
|
|
||||||
use crate::{Criterion, FilterCondition, SearchResult};
|
use crate::error::Error;
|
||||||
use crate::update::{IndexDocuments, UpdateFormat};
|
use crate::update::{IndexDocuments, UpdateFormat};
|
||||||
|
use crate::{Criterion, FilterCondition, SearchResult};
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
@ -976,4 +1014,56 @@ mod tests {
|
|||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
FilterCondition::from_str(&rtxn, &index, "toto = 32").unwrap_err();
|
FilterCondition::from_str(&rtxn, &index, "toto = 32").unwrap_err();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn setting_primary_key() {
|
||||||
|
let path = tempfile::tempdir().unwrap();
|
||||||
|
let mut options = EnvOpenOptions::new();
|
||||||
|
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||||
|
let index = Index::new(options, &path).unwrap();
|
||||||
|
|
||||||
|
// Set the primary key settings
|
||||||
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
|
let mut builder = Settings::new(&mut wtxn, &index, 0);
|
||||||
|
builder.set_primary_key(S("mykey"));
|
||||||
|
|
||||||
|
builder.execute(|_, _| ()).unwrap();
|
||||||
|
assert_eq!(index.primary_key(&wtxn).unwrap(), Some("mykey"));
|
||||||
|
|
||||||
|
// Then index some documents with the "mykey" primary key.
|
||||||
|
let content = &br#"[
|
||||||
|
{ "mykey": 1, "name": "kevin", "age": 23 },
|
||||||
|
{ "mykey": 2, "name": "kevina", "age": 21 },
|
||||||
|
{ "mykey": 3, "name": "benoit", "age": 34 },
|
||||||
|
{ "mykey": 4, "name": "bernard", "age": 34 },
|
||||||
|
{ "mykey": 5, "name": "bertrand", "age": 34 },
|
||||||
|
{ "mykey": 6, "name": "bernie", "age": 34 },
|
||||||
|
{ "mykey": 7, "name": "ben", "age": 34 }
|
||||||
|
]"#[..];
|
||||||
|
let mut builder = IndexDocuments::new(&mut wtxn, &index, 1);
|
||||||
|
builder.update_format(UpdateFormat::Json);
|
||||||
|
builder.disable_autogenerate_docids();
|
||||||
|
builder.execute(content, |_, _| ()).unwrap();
|
||||||
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
|
// We now try to reset the primary key
|
||||||
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
|
let mut builder = Settings::new(&mut wtxn, &index, 0);
|
||||||
|
builder.reset_primary_key();
|
||||||
|
|
||||||
|
let err = builder.execute(|_, _| ()).unwrap_err();
|
||||||
|
assert!(matches!(err, Error::UserError(UserError::PrimaryKeyCannotBeReset)));
|
||||||
|
wtxn.abort().unwrap();
|
||||||
|
|
||||||
|
// But if we clear the database...
|
||||||
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
|
let builder = ClearDocuments::new(&mut wtxn, &index, 0);
|
||||||
|
builder.execute().unwrap();
|
||||||
|
|
||||||
|
// ...we can change the primary key
|
||||||
|
let mut builder = Settings::new(&mut wtxn, &index, 0);
|
||||||
|
builder.set_primary_key(S("myid"));
|
||||||
|
builder.execute(|_, _| ()).unwrap();
|
||||||
|
wtxn.commit().unwrap();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
use grenad::CompressionType;
|
use grenad::CompressionType;
|
||||||
use rayon::ThreadPool;
|
use rayon::ThreadPool;
|
||||||
|
|
||||||
use crate::Index;
|
use crate::{Index, Result};
|
||||||
use super::{ClearDocuments, DeleteDocuments, IndexDocuments, Settings, Facets};
|
use super::{ClearDocuments, DeleteDocuments, IndexDocuments, Settings, Facets};
|
||||||
|
|
||||||
pub struct UpdateBuilder<'a> {
|
pub struct UpdateBuilder<'a> {
|
||||||
@ -76,7 +76,7 @@ impl<'a> UpdateBuilder<'a> {
|
|||||||
self,
|
self,
|
||||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||||
index: &'i Index,
|
index: &'i Index,
|
||||||
) -> anyhow::Result<DeleteDocuments<'t, 'u, 'i>>
|
) -> Result<DeleteDocuments<'t, 'u, 'i>>
|
||||||
{
|
{
|
||||||
DeleteDocuments::new(wtxn, index, self.update_id)
|
DeleteDocuments::new(wtxn, index, self.update_id)
|
||||||
}
|
}
|
||||||
|
@ -5,8 +5,11 @@ use fst::Streamer;
|
|||||||
use grenad::CompressionType;
|
use grenad::CompressionType;
|
||||||
use heed::types::ByteSlice;
|
use heed::types::ByteSlice;
|
||||||
|
|
||||||
|
use crate::Result;
|
||||||
use crate::update::index_documents::WriteMethod;
|
use crate::update::index_documents::WriteMethod;
|
||||||
use crate::update::index_documents::{create_sorter, word_docids_merge, sorter_into_lmdb_database};
|
use crate::update::index_documents::{
|
||||||
|
create_sorter, roaring_bitmap_merge, sorter_into_lmdb_database,
|
||||||
|
};
|
||||||
|
|
||||||
pub struct WordPrefixDocids<'t, 'u, 'i> {
|
pub struct WordPrefixDocids<'t, 'u, 'i> {
|
||||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||||
@ -31,7 +34,7 @@ impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn execute(self) -> anyhow::Result<()> {
|
pub fn execute(self) -> Result<()> {
|
||||||
// Clear the word prefix docids database.
|
// Clear the word prefix docids database.
|
||||||
self.index.word_prefix_docids.clear(self.wtxn)?;
|
self.index.word_prefix_docids.clear(self.wtxn)?;
|
||||||
|
|
||||||
@ -40,7 +43,7 @@ impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> {
|
|||||||
// It is forbidden to keep a mutable reference into the database
|
// It is forbidden to keep a mutable reference into the database
|
||||||
// and write into it at the same time, therefore we write into another file.
|
// and write into it at the same time, therefore we write into another file.
|
||||||
let mut prefix_docids_sorter = create_sorter(
|
let mut prefix_docids_sorter = create_sorter(
|
||||||
word_docids_merge,
|
roaring_bitmap_merge,
|
||||||
self.chunk_compression_type,
|
self.chunk_compression_type,
|
||||||
self.chunk_compression_level,
|
self.chunk_compression_level,
|
||||||
self.chunk_fusing_shrink_size,
|
self.chunk_fusing_shrink_size,
|
||||||
@ -66,7 +69,7 @@ impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> {
|
|||||||
self.wtxn,
|
self.wtxn,
|
||||||
*self.index.word_prefix_docids.as_polymorph(),
|
*self.index.word_prefix_docids.as_polymorph(),
|
||||||
prefix_docids_sorter,
|
prefix_docids_sorter,
|
||||||
word_docids_merge,
|
roaring_bitmap_merge,
|
||||||
WriteMethod::Append,
|
WriteMethod::Append,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
|
@ -7,11 +7,11 @@ use heed::BytesEncode;
|
|||||||
use heed::types::ByteSlice;
|
use heed::types::ByteSlice;
|
||||||
use log::debug;
|
use log::debug;
|
||||||
|
|
||||||
use crate::Index;
|
use crate::{Index, Result};
|
||||||
use crate::heed_codec::StrStrU8Codec;
|
use crate::heed_codec::StrStrU8Codec;
|
||||||
use crate::update::index_documents::{
|
use crate::update::index_documents::{
|
||||||
WriteMethod, create_sorter, sorter_into_lmdb_database,
|
WriteMethod, create_sorter, sorter_into_lmdb_database,
|
||||||
words_pairs_proximities_docids_merge,
|
cbo_roaring_bitmap_merge,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub struct WordPrefixPairProximityDocids<'t, 'u, 'i> {
|
pub struct WordPrefixPairProximityDocids<'t, 'u, 'i> {
|
||||||
@ -41,7 +41,7 @@ impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn execute(self) -> anyhow::Result<()> {
|
pub fn execute(self) -> Result<()> {
|
||||||
debug!("Computing and writing the word prefix pair proximity docids into LMDB on disk...");
|
debug!("Computing and writing the word prefix pair proximity docids into LMDB on disk...");
|
||||||
|
|
||||||
self.index.word_prefix_pair_proximity_docids.clear(self.wtxn)?;
|
self.index.word_prefix_pair_proximity_docids.clear(self.wtxn)?;
|
||||||
@ -50,7 +50,7 @@ impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
|
|||||||
|
|
||||||
// Here we create a sorter akin to the previous one.
|
// Here we create a sorter akin to the previous one.
|
||||||
let mut word_prefix_pair_proximity_docids_sorter = create_sorter(
|
let mut word_prefix_pair_proximity_docids_sorter = create_sorter(
|
||||||
words_pairs_proximities_docids_merge,
|
cbo_roaring_bitmap_merge,
|
||||||
self.chunk_compression_type,
|
self.chunk_compression_type,
|
||||||
self.chunk_compression_level,
|
self.chunk_compression_level,
|
||||||
self.chunk_fusing_shrink_size,
|
self.chunk_fusing_shrink_size,
|
||||||
@ -80,7 +80,7 @@ impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
|
|||||||
self.wtxn,
|
self.wtxn,
|
||||||
*self.index.word_prefix_pair_proximity_docids.as_polymorph(),
|
*self.index.word_prefix_pair_proximity_docids.as_polymorph(),
|
||||||
word_prefix_pair_proximity_docids_sorter,
|
word_prefix_pair_proximity_docids_sorter,
|
||||||
words_pairs_proximities_docids_merge,
|
cbo_roaring_bitmap_merge,
|
||||||
WriteMethod::Append,
|
WriteMethod::Append,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
|
@ -11,11 +11,13 @@ use heed::{BytesEncode, Error};
|
|||||||
use log::debug;
|
use log::debug;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
|
use crate::error::InternalError;
|
||||||
use crate::heed_codec::{StrLevelPositionCodec, CboRoaringBitmapCodec};
|
use crate::heed_codec::{StrLevelPositionCodec, CboRoaringBitmapCodec};
|
||||||
|
use crate::Result;
|
||||||
use crate::update::index_documents::WriteMethod;
|
use crate::update::index_documents::WriteMethod;
|
||||||
use crate::update::index_documents::{
|
use crate::update::index_documents::{
|
||||||
create_writer, create_sorter, writer_into_reader, write_into_lmdb_database,
|
create_writer, create_sorter, writer_into_reader, write_into_lmdb_database,
|
||||||
word_prefix_level_positions_docids_merge, sorter_into_lmdb_database
|
cbo_roaring_bitmap_merge, sorter_into_lmdb_database
|
||||||
};
|
};
|
||||||
use crate::{Index, TreeLevel};
|
use crate::{Index, TreeLevel};
|
||||||
|
|
||||||
@ -56,7 +58,7 @@ impl<'t, 'u, 'i> WordsLevelPositions<'t, 'u, 'i> {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn execute(self) -> anyhow::Result<()> {
|
pub fn execute(self) -> Result<()> {
|
||||||
debug!("Computing and writing the word levels positions docids into LMDB on disk...");
|
debug!("Computing and writing the word levels positions docids into LMDB on disk...");
|
||||||
|
|
||||||
let entries = compute_positions_levels(
|
let entries = compute_positions_levels(
|
||||||
@ -78,7 +80,7 @@ impl<'t, 'u, 'i> WordsLevelPositions<'t, 'u, 'i> {
|
|||||||
self.wtxn,
|
self.wtxn,
|
||||||
*self.index.word_level_position_docids.as_polymorph(),
|
*self.index.word_level_position_docids.as_polymorph(),
|
||||||
entries,
|
entries,
|
||||||
|_, _| anyhow::bail!("invalid word level position merging"),
|
|_, _| Err(InternalError::IndexingMergingKeys { process: "word level position" }),
|
||||||
WriteMethod::Append,
|
WriteMethod::Append,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
@ -86,7 +88,7 @@ impl<'t, 'u, 'i> WordsLevelPositions<'t, 'u, 'i> {
|
|||||||
self.index.word_prefix_level_position_docids.clear(self.wtxn)?;
|
self.index.word_prefix_level_position_docids.clear(self.wtxn)?;
|
||||||
|
|
||||||
let mut word_prefix_level_positions_docids_sorter = create_sorter(
|
let mut word_prefix_level_positions_docids_sorter = create_sorter(
|
||||||
word_prefix_level_positions_docids_merge,
|
cbo_roaring_bitmap_merge,
|
||||||
self.chunk_compression_type,
|
self.chunk_compression_type,
|
||||||
self.chunk_compression_level,
|
self.chunk_compression_level,
|
||||||
self.chunk_fusing_shrink_size,
|
self.chunk_fusing_shrink_size,
|
||||||
@ -119,7 +121,7 @@ impl<'t, 'u, 'i> WordsLevelPositions<'t, 'u, 'i> {
|
|||||||
self.wtxn,
|
self.wtxn,
|
||||||
*self.index.word_prefix_level_position_docids.as_polymorph(),
|
*self.index.word_prefix_level_position_docids.as_polymorph(),
|
||||||
word_prefix_level_positions_docids_sorter,
|
word_prefix_level_positions_docids_sorter,
|
||||||
word_prefix_level_positions_docids_merge,
|
cbo_roaring_bitmap_merge,
|
||||||
WriteMethod::Append,
|
WriteMethod::Append,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
@ -142,7 +144,7 @@ impl<'t, 'u, 'i> WordsLevelPositions<'t, 'u, 'i> {
|
|||||||
self.wtxn,
|
self.wtxn,
|
||||||
*self.index.word_prefix_level_position_docids.as_polymorph(),
|
*self.index.word_prefix_level_position_docids.as_polymorph(),
|
||||||
entries,
|
entries,
|
||||||
|_, _| anyhow::bail!("invalid word prefix level position merging"),
|
|_, _| Err(InternalError::IndexingMergingKeys { process: "word prefix level position" }),
|
||||||
WriteMethod::Append,
|
WriteMethod::Append,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
@ -174,7 +176,7 @@ fn compute_positions_levels(
|
|||||||
shrink_size: Option<u64>,
|
shrink_size: Option<u64>,
|
||||||
level_group_size: NonZeroU32,
|
level_group_size: NonZeroU32,
|
||||||
min_level_size: NonZeroU32,
|
min_level_size: NonZeroU32,
|
||||||
) -> anyhow::Result<Reader<FileFuse>>
|
) -> Result<Reader<FileFuse>>
|
||||||
{
|
{
|
||||||
// It is forbidden to keep a cursor and write in a database at the same time with LMDB
|
// It is forbidden to keep a cursor and write in a database at the same time with LMDB
|
||||||
// therefore we write the facet levels entries into a grenad file before transfering them.
|
// therefore we write the facet levels entries into a grenad file before transfering them.
|
||||||
@ -251,7 +253,7 @@ fn write_level_entry(
|
|||||||
left: u32,
|
left: u32,
|
||||||
right: u32,
|
right: u32,
|
||||||
ids: &RoaringBitmap,
|
ids: &RoaringBitmap,
|
||||||
) -> anyhow::Result<()>
|
) -> Result<()>
|
||||||
{
|
{
|
||||||
let key = (word, level, left, right);
|
let key = (word, level, left, right);
|
||||||
let key = StrLevelPositionCodec::bytes_encode(&key).ok_or(Error::Encoding)?;
|
let key = StrLevelPositionCodec::bytes_encode(&key).ok_or(Error::Encoding)?;
|
||||||
|
@ -2,7 +2,7 @@ use std::iter::FromIterator;
|
|||||||
use std::str;
|
use std::str;
|
||||||
|
|
||||||
use fst::Streamer;
|
use fst::Streamer;
|
||||||
use crate::{Index, SmallString32};
|
use crate::{Index, SmallString32, Result};
|
||||||
|
|
||||||
pub struct WordsPrefixesFst<'t, 'u, 'i> {
|
pub struct WordsPrefixesFst<'t, 'u, 'i> {
|
||||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||||
@ -48,7 +48,7 @@ impl<'t, 'u, 'i> WordsPrefixesFst<'t, 'u, 'i> {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn execute(self) -> anyhow::Result<()> {
|
pub fn execute(self) -> Result<()> {
|
||||||
let words_fst = self.index.words_fst(&self.wtxn)?;
|
let words_fst = self.index.words_fst(&self.wtxn)?;
|
||||||
let number_of_words = words_fst.len();
|
let number_of_words = words_fst.len();
|
||||||
let min_number_of_words = (number_of_words as f64 * self.threshold) as usize;
|
let min_number_of_words = (number_of_words as f64 * self.threshold) as usize;
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
use milli::{Search, SearchResult, Criterion};
|
|
||||||
use big_s::S;
|
use big_s::S;
|
||||||
|
use milli::update::Settings;
|
||||||
|
use milli::{Search, SearchResult, Criterion};
|
||||||
|
|
||||||
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
|
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
|
||||||
use Criterion::*;
|
use Criterion::*;
|
||||||
@ -189,7 +190,9 @@ fn criteria_mixup() {
|
|||||||
eprintln!("Testing with criteria order: {:?}", &criteria);
|
eprintln!("Testing with criteria order: {:?}", &criteria);
|
||||||
//update criteria
|
//update criteria
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
index.put_criteria(&mut wtxn, &criteria).unwrap();
|
let mut builder = Settings::new(&mut wtxn, &index, 0);
|
||||||
|
builder.set_criteria(criteria.iter().map(ToString::to_string).collect());
|
||||||
|
builder.execute(|_, _| ()).unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
let mut rtxn = index.read_txn().unwrap();
|
let mut rtxn = index.read_txn().unwrap();
|
||||||
|
Loading…
x
Reference in New Issue
Block a user