mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-26 13:17:30 +01:00
Remove legacy source code
This commit is contained in:
parent
381e07b7b6
commit
abca68bf24
@ -1,5 +0,0 @@
|
||||
target
|
||||
Dockerfile
|
||||
.dockerignore
|
||||
.git
|
||||
.gitignore
|
8
.gitignore
vendored
8
.gitignore
vendored
@ -1,8 +0,0 @@
|
||||
/target
|
||||
meilisearch-core/target
|
||||
**/*.csv
|
||||
**/*.json_lines
|
||||
**/*.rs.bk
|
||||
/*.mdb
|
||||
/query-history.txt
|
||||
/data.ms
|
126
CHANGELOG.md
126
CHANGELOG.md
@ -1,126 +0,0 @@
|
||||
## v0.20.0 - 2021-03-22
|
||||
|
||||
- Fix build on mac M1 (#1280)
|
||||
- Server root returns 200 in production (#1292)
|
||||
- Healthcheck returns 200 (#1291)
|
||||
- Snapshot temporary files are not created in /tmp anymore (#1238)
|
||||
|
||||
## v0.19.0 - 2021-02-09
|
||||
|
||||
- The snapshots are now created and then renamed in atomically (#1172)
|
||||
- Fix a race condition when an update and a document addition are processed immediately one after the other (#1176)
|
||||
- Latin synonyms are normalized during indexation (#1174)
|
||||
|
||||
## v0.18.1 - 2021-01-14
|
||||
|
||||
- Fix unexpected CORS error (#1185)
|
||||
|
||||
## v0.18.0 - 2021-01-11
|
||||
|
||||
- Integration with the new tokenizer (#1091)
|
||||
- Fix setting consistency bug (#1128)
|
||||
- Fix attributes to retrieve bug (#1131)
|
||||
- Increase default payload size (#1147)
|
||||
- Improvements to code quality (#1167, #1165, #1126, #1151)
|
||||
|
||||
## v0.17.0 - 2020-11-30
|
||||
- Fix corrupted data during placeholder search (#1089)
|
||||
- Remove maintenance error from http (#1082)
|
||||
- Disable frontend in production (#1097)
|
||||
- Update nbHits count with filtered documents (#849)
|
||||
- Remove update changelog ci check (#1090)
|
||||
- Add deploy on Platform.sh option to README (#1087)
|
||||
- Change movie gifs in README (#1077)
|
||||
- Remove some clippy warnings (#1100)
|
||||
- Improve script `download-latest.sh` (#1054)
|
||||
- Bump dependencies version (#1056, #1057, #1059)
|
||||
|
||||
## v0.16.0 - 2020-11-02
|
||||
|
||||
- Automatically create index on document push if index doesn't exist (#914)
|
||||
- Sort displayedAttributes and facetDistribution (#946)
|
||||
|
||||
## v0.15.0 - 2020-09-30
|
||||
|
||||
- Update actix-web dependency to 3.0.0 (#963)
|
||||
- Consider an empty query to be a placeholder search (#916)
|
||||
|
||||
## v0.14.1
|
||||
|
||||
- Fix version mismatch in snapshot importation (#959)
|
||||
|
||||
## v0.14.0
|
||||
|
||||
- Sort displayedAttributes (#943)
|
||||
- Fix facet distribution case (#797)
|
||||
- Snapshotting (#839)
|
||||
- Fix bucket-sort unwrap bug (#915)
|
||||
|
||||
## v0.13.0
|
||||
|
||||
- placeholder search (#771)
|
||||
- Add database version mismatch check (#794)
|
||||
- Displayed and searchable attributes wildcard (#846)
|
||||
- Remove sys-info route (#810)
|
||||
- Check database version mismatch (#794)
|
||||
- Fix unique docid bug (#841)
|
||||
- Error codes in updates (#792)
|
||||
- Sentry disable argument (#813)
|
||||
- Log analytics if enabled (#825)
|
||||
- Fix default values displayed on web interface (#874)
|
||||
|
||||
## v0.12.0
|
||||
|
||||
- Fix long documents not being indexed completely bug (#816)
|
||||
- Fix distinct attribute returning id instead of name (#800)
|
||||
- error code rename (#805)
|
||||
|
||||
## v0.11.1
|
||||
|
||||
- Fix facet cache on document update (#789)
|
||||
- Improvements on settings consistency (#778)
|
||||
|
||||
## v0.11.0
|
||||
|
||||
- Change the HTTP framework, moving from tide to actix-web (#601)
|
||||
- Bump sentry version to 0.18.1 (#690)
|
||||
- Enable max payload size override (#684)
|
||||
- Disable sentry in debug (#681)
|
||||
- Better terminal greeting (#680)
|
||||
- Fix highlight misalignment (#679)
|
||||
- Add support for facet count (#676)
|
||||
- Add support for faceted search (#631)
|
||||
- Add support for configuring the lmdb map size (#646, #647)
|
||||
- Add exposed port for Dockerfile (#654)
|
||||
- Add sentry probe (#664)
|
||||
- Fix url trailing slash and double slash issues (#659)
|
||||
- Fix accept all Content-Type by default (#653)
|
||||
- Return the error message from Serde when a deserialization error is encountered (#661)
|
||||
- Fix NormalizePath middleware to make the dashboard accessible (#695)
|
||||
- Update sentry features to remove openssl (#702)
|
||||
- Add SSL support (#669)
|
||||
- Rename fieldsFrequency into fieldsDistribution in stats (#719)
|
||||
- Add support for error code reporting (#703)
|
||||
- Allow the dashboard to query private servers (#732)
|
||||
- Add telemetry (#720)
|
||||
- Add post route for search (#735)
|
||||
|
||||
## v0.10.1
|
||||
|
||||
- Add support for floating points in filters (#640)
|
||||
- Add '@' character as tokenizer separator (#607)
|
||||
- Add support for filtering on arrays of strings (#611)
|
||||
|
||||
## v0.10.0
|
||||
|
||||
- Refined filtering (#592)
|
||||
- Add the number of hits in search result (#541)
|
||||
- Add support for aligned crop in search result (#543)
|
||||
- Sanitize the content displayed in the web interface (#539)
|
||||
- Add support of nested null, boolean and seq values (#571 and #568, #574)
|
||||
- Fixed the core benchmark (#576)
|
||||
- Publish an ARMv7 and ARMv8 binaries on releases (#540 and #581)
|
||||
- Fixed a bug where the result of the update status after the first update was empty (#542)
|
||||
- Fixed a bug where stop words were not handled correctly (#594)
|
||||
- Fix CORS issues (#602)
|
||||
- Support wildcard on attributes to retrieve, highlight, and crop (#549, #565, and #598)
|
3662
Cargo.lock
generated
3662
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
10
Cargo.toml
10
Cargo.toml
@ -1,10 +0,0 @@
|
||||
[workspace]
|
||||
members = [
|
||||
"meilisearch-core",
|
||||
"meilisearch-http",
|
||||
"meilisearch-schema",
|
||||
"meilisearch-types",
|
||||
]
|
||||
|
||||
[profile.release]
|
||||
debug = true
|
29
Dockerfile
29
Dockerfile
@ -1,29 +0,0 @@
|
||||
# Compile
|
||||
FROM alpine:3.10 AS compiler
|
||||
|
||||
RUN apk update --quiet
|
||||
RUN apk add curl
|
||||
RUN apk add build-base
|
||||
|
||||
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
||||
|
||||
WORKDIR /meilisearch
|
||||
|
||||
COPY . .
|
||||
|
||||
ENV RUSTFLAGS="-C target-feature=-crt-static"
|
||||
|
||||
RUN $HOME/.cargo/bin/cargo build --release
|
||||
|
||||
# Run
|
||||
FROM alpine:3.10
|
||||
|
||||
RUN apk add -q --no-cache libgcc tini
|
||||
|
||||
COPY --from=compiler /meilisearch/target/release/meilisearch .
|
||||
|
||||
ENV MEILI_HTTP_ADDR 0.0.0.0:7700
|
||||
EXPOSE 7700/tcp
|
||||
|
||||
ENTRYPOINT ["tini", "--"]
|
||||
CMD ./meilisearch
|
@ -1,3 +0,0 @@
|
||||
status = ["Test on macos-latest", "Test on ubuntu-18.04"]
|
||||
# 4 hours timeout
|
||||
timeout-sec = 14400
|
38
bump.sh
38
bump.sh
@ -1,38 +0,0 @@
|
||||
#!/usr/bin/bash
|
||||
|
||||
NEW_VERSION=$1
|
||||
|
||||
if [ -z "$NEW_VERSION" ]
|
||||
then
|
||||
echo "error: a version number must be provided"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# find current version
|
||||
CURRENT_VERSION=$(cat **/*.toml | grep meilisearch | grep version | sed 's/.*\([0-9]\+\.[0-9]\+\.[0-9]\+\).*/\1/' | sed "1q;d")
|
||||
|
||||
# bump all version in .toml
|
||||
echo "bumping from version $CURRENT_VERSION to version $NEW_VERSION"
|
||||
while true
|
||||
do
|
||||
read -r -p "Continue (y/n)?" choice
|
||||
case "$choice" in
|
||||
y|Y ) break;;
|
||||
n|N ) echo "aborting bump" && exit 0;;
|
||||
* ) echo "invalid choice";;
|
||||
esac
|
||||
done
|
||||
# update all crate version
|
||||
sed -i "s/version = \"$CURRENT_VERSION\"/version = \"$NEW_VERSION\"/" **/*.toml
|
||||
|
||||
printf "running cargo check: "
|
||||
|
||||
CARGO_CHECK=$(cargo check 2>&1)
|
||||
|
||||
if [ $? != "0" ]
|
||||
then
|
||||
printf "\033[31;1m FAIL \033[0m\n"
|
||||
printf "$CARGO_CHECK"
|
||||
exit 1
|
||||
fi
|
||||
printf "\033[32;1m OK \033[0m\n"
|
@ -1,53 +0,0 @@
|
||||
[package]
|
||||
name = "meilisearch-core"
|
||||
version = "0.20.0"
|
||||
license = "MIT"
|
||||
authors = ["Kerollmops <clement@meilisearch.com>"]
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
arc-swap = "1.2.0"
|
||||
bincode = "1.3.1"
|
||||
byteorder = "1.3.4"
|
||||
chrono = { version = "0.4.19", features = ["serde"] }
|
||||
compact_arena = "0.4.1"
|
||||
cow-utils = "0.1.2"
|
||||
crossbeam-channel = "0.5.0"
|
||||
deunicode = "1.1.1"
|
||||
either = "1.6.1"
|
||||
env_logger = "0.8.2"
|
||||
fst = "0.4.5"
|
||||
hashbrown = { version = "0.9.1", features = ["serde"] }
|
||||
heed = "0.10.6"
|
||||
indexmap = { version = "1.6.1", features = ["serde-1"] }
|
||||
intervaltree = "0.2.6"
|
||||
itertools = "0.10.0"
|
||||
levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] }
|
||||
log = "0.4.11"
|
||||
meilisearch-error = { path = "../meilisearch-error", version = "0.20.0" }
|
||||
meilisearch-schema = { path = "../meilisearch-schema", version = "0.20.0" }
|
||||
meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", tag = "v0.1.3" }
|
||||
meilisearch-types = { path = "../meilisearch-types", version = "0.20.0" }
|
||||
once_cell = "1.5.2"
|
||||
ordered-float = { version = "2.0.1", features = ["serde"] }
|
||||
pest = { git = "https://github.com/pest-parser/pest.git", rev = "51fd1d49f1041f7839975664ef71fe15c7dcaf67" }
|
||||
pest_derive = "2.1.0"
|
||||
regex = "1.4.2"
|
||||
sdset = "0.4.0"
|
||||
serde = { version = "1.0.118", features = ["derive"] }
|
||||
serde_json = { version = "1.0.61", features = ["preserve_order"] }
|
||||
slice-group-by = "0.2.6"
|
||||
unicase = "2.6.0"
|
||||
zerocopy = "0.3.0"
|
||||
|
||||
[dev-dependencies]
|
||||
assert_matches = "1.4.0"
|
||||
criterion = "0.3.3"
|
||||
csv = "1.1.5"
|
||||
rustyline = { version = "7.1.0", default-features = false }
|
||||
structopt = "0.3.21"
|
||||
tempfile = "3.1.0"
|
||||
termcolor = "1.1.2"
|
||||
|
||||
[target.'cfg(unix)'.dev-dependencies]
|
||||
jemallocator = "0.3.2"
|
@ -1,473 +0,0 @@
|
||||
use std::collections::HashSet;
|
||||
use std::collections::btree_map::{BTreeMap, Entry};
|
||||
use std::error::Error;
|
||||
use std::io::{Read, Write};
|
||||
use std::iter::FromIterator;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::time::{Duration, Instant};
|
||||
use std::{fs, io, sync::mpsc};
|
||||
|
||||
use rustyline::{Config, Editor};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use structopt::StructOpt;
|
||||
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
|
||||
|
||||
use meilisearch_core::{Database, DatabaseOptions, Highlight, ProcessedUpdateResult};
|
||||
use meilisearch_core::settings::Settings;
|
||||
use meilisearch_schema::FieldId;
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
#[global_allocator]
|
||||
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
|
||||
|
||||
#[derive(Debug, StructOpt)]
|
||||
struct IndexCommand {
|
||||
/// The destination where the database must be created.
|
||||
#[structopt(parse(from_os_str))]
|
||||
database_path: PathBuf,
|
||||
|
||||
#[structopt(long, default_value = "default")]
|
||||
index_uid: String,
|
||||
|
||||
/// The csv file path to index, you can also use `-` to specify the standard input.
|
||||
#[structopt(parse(from_os_str))]
|
||||
csv_data_path: PathBuf,
|
||||
|
||||
/// The path to the settings.
|
||||
#[structopt(long, parse(from_os_str))]
|
||||
settings: PathBuf,
|
||||
|
||||
#[structopt(long)]
|
||||
update_group_size: Option<usize>,
|
||||
|
||||
#[structopt(long, parse(from_os_str))]
|
||||
compact_to_path: Option<PathBuf>,
|
||||
}
|
||||
|
||||
#[derive(Debug, StructOpt)]
|
||||
struct SearchCommand {
|
||||
/// The path of the database to work with.
|
||||
#[structopt(parse(from_os_str))]
|
||||
database_path: PathBuf,
|
||||
|
||||
#[structopt(long, default_value = "default")]
|
||||
index_uid: String,
|
||||
|
||||
/// Timeout after which the search will return results.
|
||||
#[structopt(long)]
|
||||
fetch_timeout_ms: Option<u64>,
|
||||
|
||||
/// The number of returned results
|
||||
#[structopt(short, long, default_value = "10")]
|
||||
number_results: usize,
|
||||
|
||||
/// The number of characters before and after the first match
|
||||
#[structopt(short = "C", long, default_value = "35")]
|
||||
char_context: usize,
|
||||
|
||||
/// A filter string that can be `!adult` or `adult` to
|
||||
/// filter documents on this specfied field
|
||||
#[structopt(short, long)]
|
||||
filter: Option<String>,
|
||||
|
||||
/// Fields that must be displayed.
|
||||
displayed_fields: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, StructOpt)]
|
||||
struct ShowUpdatesCommand {
|
||||
/// The path of the database to work with.
|
||||
#[structopt(parse(from_os_str))]
|
||||
database_path: PathBuf,
|
||||
|
||||
#[structopt(long, default_value = "default")]
|
||||
index_uid: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, StructOpt)]
|
||||
enum Command {
|
||||
Index(IndexCommand),
|
||||
Search(SearchCommand),
|
||||
ShowUpdates(ShowUpdatesCommand),
|
||||
}
|
||||
|
||||
impl Command {
|
||||
fn path(&self) -> &Path {
|
||||
match self {
|
||||
Command::Index(command) => &command.database_path,
|
||||
Command::Search(command) => &command.database_path,
|
||||
Command::ShowUpdates(command) => &command.database_path,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
#[serde(transparent)]
|
||||
struct Document(indexmap::IndexMap<String, String>);
|
||||
|
||||
fn index_command(command: IndexCommand, database: Database) -> Result<(), Box<dyn Error>> {
|
||||
let start = Instant::now();
|
||||
|
||||
let (sender, receiver) = mpsc::sync_channel(100);
|
||||
let update_fn =
|
||||
move |_name: &str, update: ProcessedUpdateResult| sender.send(update.update_id).unwrap();
|
||||
let index = match database.open_index(&command.index_uid) {
|
||||
Some(index) => index,
|
||||
None => database.create_index(&command.index_uid).unwrap(),
|
||||
};
|
||||
|
||||
database.set_update_callback(Box::new(update_fn));
|
||||
|
||||
let db = &database;
|
||||
|
||||
let settings = {
|
||||
let string = fs::read_to_string(&command.settings)?;
|
||||
let settings: Settings = serde_json::from_str(&string).unwrap();
|
||||
settings.to_update().unwrap()
|
||||
};
|
||||
|
||||
db.update_write(|w| index.settings_update(w, settings))?;
|
||||
|
||||
let mut rdr = if command.csv_data_path.as_os_str() == "-" {
|
||||
csv::Reader::from_reader(Box::new(io::stdin()) as Box<dyn Read>)
|
||||
} else {
|
||||
let file = std::fs::File::open(command.csv_data_path)?;
|
||||
csv::Reader::from_reader(Box::new(file) as Box<dyn Read>)
|
||||
};
|
||||
|
||||
let mut raw_record = csv::StringRecord::new();
|
||||
let headers = rdr.headers()?.clone();
|
||||
|
||||
let mut max_update_id = 0;
|
||||
let mut i = 0;
|
||||
let mut end_of_file = false;
|
||||
|
||||
while !end_of_file {
|
||||
let mut additions = index.documents_addition();
|
||||
|
||||
loop {
|
||||
end_of_file = !rdr.read_record(&mut raw_record)?;
|
||||
if end_of_file {
|
||||
break;
|
||||
}
|
||||
|
||||
let document: Document = match raw_record.deserialize(Some(&headers)) {
|
||||
Ok(document) => document,
|
||||
Err(e) => {
|
||||
eprintln!("{:?}", e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
additions.update_document(document);
|
||||
|
||||
print!("\rindexing document {}", i);
|
||||
i += 1;
|
||||
|
||||
if let Some(group_size) = command.update_group_size {
|
||||
if i % group_size == 0 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
println!();
|
||||
|
||||
let update_id = db.update_write(|w| additions.finalize(w))?;
|
||||
|
||||
println!("committing update...");
|
||||
max_update_id = max_update_id.max(update_id);
|
||||
println!("committed update {}", update_id);
|
||||
}
|
||||
|
||||
println!("Waiting for update {}", max_update_id);
|
||||
for id in receiver {
|
||||
if id == max_update_id {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
println!(
|
||||
"database created in {:.2?} at: {:?}",
|
||||
start.elapsed(),
|
||||
command.database_path
|
||||
);
|
||||
|
||||
if let Some(path) = command.compact_to_path {
|
||||
fs::create_dir_all(&path)?;
|
||||
let start = Instant::now();
|
||||
let _file = database.copy_and_compact_to_path(path.join("data.mdb"))?;
|
||||
println!(
|
||||
"database compacted in {:.2?} at: {:?}",
|
||||
start.elapsed(),
|
||||
path
|
||||
);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn display_highlights(text: &str, ranges: &[usize]) -> io::Result<()> {
|
||||
let mut stdout = StandardStream::stdout(ColorChoice::Always);
|
||||
let mut highlighted = false;
|
||||
|
||||
for range in ranges.windows(2) {
|
||||
let [start, end] = match range {
|
||||
[start, end] => [*start, *end],
|
||||
_ => unreachable!(),
|
||||
};
|
||||
if highlighted {
|
||||
stdout.set_color(
|
||||
ColorSpec::new()
|
||||
.set_fg(Some(Color::Yellow))
|
||||
.set_underline(true),
|
||||
)?;
|
||||
}
|
||||
write!(&mut stdout, "{}", &text[start..end])?;
|
||||
stdout.reset()?;
|
||||
highlighted = !highlighted;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn char_to_byte_range(index: usize, length: usize, text: &str) -> (usize, usize) {
|
||||
let mut byte_index = 0;
|
||||
let mut byte_length = 0;
|
||||
|
||||
for (n, (i, c)) in text.char_indices().enumerate() {
|
||||
if n == index {
|
||||
byte_index = i;
|
||||
}
|
||||
|
||||
if n + 1 == index + length {
|
||||
byte_length = i - byte_index + c.len_utf8();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
(byte_index, byte_length)
|
||||
}
|
||||
|
||||
fn create_highlight_areas(text: &str, highlights: &[Highlight]) -> Vec<usize> {
|
||||
let mut byte_indexes = BTreeMap::new();
|
||||
|
||||
for highlight in highlights {
|
||||
let char_index = highlight.char_index as usize;
|
||||
let char_length = highlight.char_length as usize;
|
||||
let (byte_index, byte_length) = char_to_byte_range(char_index, char_length, text);
|
||||
|
||||
match byte_indexes.entry(byte_index) {
|
||||
Entry::Vacant(entry) => {
|
||||
entry.insert(byte_length);
|
||||
}
|
||||
Entry::Occupied(mut entry) => {
|
||||
if *entry.get() < byte_length {
|
||||
entry.insert(byte_length);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut title_areas = Vec::new();
|
||||
title_areas.push(0);
|
||||
for (byte_index, length) in byte_indexes {
|
||||
title_areas.push(byte_index);
|
||||
title_areas.push(byte_index + length);
|
||||
}
|
||||
title_areas.push(text.len());
|
||||
title_areas.sort_unstable();
|
||||
title_areas
|
||||
}
|
||||
|
||||
/// note: matches must have been sorted by `char_index` and `char_length` before being passed.
|
||||
///
|
||||
/// ```no_run
|
||||
/// matches.sort_unstable_by_key(|m| (m.char_index, m.char_length));
|
||||
///
|
||||
/// let matches = matches.matches.iter().filter(|m| SchemaAttr::new(m.attribute) == attr).cloned();
|
||||
///
|
||||
/// let (text, matches) = crop_text(&text, matches, 35);
|
||||
/// ```
|
||||
fn crop_text(
|
||||
text: &str,
|
||||
highlights: impl IntoIterator<Item = Highlight>,
|
||||
context: usize,
|
||||
) -> (String, Vec<Highlight>) {
|
||||
let mut highlights = highlights.into_iter().peekable();
|
||||
|
||||
let char_index = highlights
|
||||
.peek()
|
||||
.map(|m| m.char_index as usize)
|
||||
.unwrap_or(0);
|
||||
let start = char_index.saturating_sub(context);
|
||||
let text = text.chars().skip(start).take(context * 2).collect();
|
||||
|
||||
let highlights = highlights
|
||||
.take_while(|m| (m.char_index as usize) + (m.char_length as usize) <= start + (context * 2))
|
||||
.map(|highlight| Highlight {
|
||||
char_index: highlight.char_index - start as u16,
|
||||
..highlight
|
||||
})
|
||||
.collect();
|
||||
|
||||
(text, highlights)
|
||||
}
|
||||
|
||||
fn search_command(command: SearchCommand, database: Database) -> Result<(), Box<dyn Error>> {
|
||||
let db = &database;
|
||||
let index = database
|
||||
.open_index(&command.index_uid)
|
||||
.expect("Could not find index");
|
||||
|
||||
let reader = db.main_read_txn().unwrap();
|
||||
let schema = index.main.schema(&reader)?;
|
||||
reader.abort().unwrap();
|
||||
|
||||
let schema = schema.ok_or(meilisearch_core::Error::SchemaMissing)?;
|
||||
|
||||
let fields = command
|
||||
.displayed_fields
|
||||
.iter()
|
||||
.map(String::as_str)
|
||||
.collect::<HashSet<_>>();
|
||||
|
||||
let config = Config::builder().auto_add_history(true).build();
|
||||
let mut readline = Editor::<()>::with_config(config);
|
||||
let _ = readline.load_history("query-history.txt");
|
||||
|
||||
for result in readline.iter("Searching for: ") {
|
||||
match result {
|
||||
Ok(query) => {
|
||||
let start_total = Instant::now();
|
||||
|
||||
let reader = db.main_read_txn().unwrap();
|
||||
let ref_index = &index;
|
||||
let ref_reader = &reader;
|
||||
|
||||
let mut builder = index.query_builder();
|
||||
if let Some(timeout) = command.fetch_timeout_ms {
|
||||
builder.with_fetch_timeout(Duration::from_millis(timeout));
|
||||
}
|
||||
|
||||
if let Some(ref filter) = command.filter {
|
||||
let filter = filter.as_str();
|
||||
let (positive, filter) = if let Some(stripped) = filter.strip_prefix('!') {
|
||||
(false, stripped)
|
||||
} else {
|
||||
(true, filter)
|
||||
};
|
||||
|
||||
let attr = schema
|
||||
.id(filter)
|
||||
.expect("Could not find filtered attribute");
|
||||
|
||||
builder.with_filter(move |document_id| {
|
||||
let string: String = ref_index
|
||||
.document_attribute(ref_reader, document_id, attr)
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
(string == "true") == positive
|
||||
});
|
||||
}
|
||||
|
||||
let result = builder.query(ref_reader, Some(&query), 0..command.number_results)?;
|
||||
|
||||
let mut retrieve_duration = Duration::default();
|
||||
|
||||
let number_of_documents = result.documents.len();
|
||||
for mut doc in result.documents {
|
||||
doc.highlights
|
||||
.sort_unstable_by_key(|m| (m.char_index, m.char_length));
|
||||
|
||||
let start_retrieve = Instant::now();
|
||||
let result = index.document::<Document>(&reader, Some(&fields), doc.id);
|
||||
retrieve_duration += start_retrieve.elapsed();
|
||||
|
||||
match result {
|
||||
Ok(Some(document)) => {
|
||||
println!("raw-id: {:?}", doc.id);
|
||||
for (name, text) in document.0 {
|
||||
print!("{}: ", name);
|
||||
|
||||
let attr = schema.id(&name).unwrap();
|
||||
let highlights = doc
|
||||
.highlights
|
||||
.iter()
|
||||
.filter(|m| FieldId::new(m.attribute) == attr)
|
||||
.cloned();
|
||||
let (text, highlights) =
|
||||
crop_text(&text, highlights, command.char_context);
|
||||
let areas = create_highlight_areas(&text, &highlights);
|
||||
display_highlights(&text, &areas)?;
|
||||
println!();
|
||||
}
|
||||
}
|
||||
Ok(None) => eprintln!("missing document"),
|
||||
Err(e) => eprintln!("{}", e),
|
||||
}
|
||||
|
||||
let mut matching_attributes = HashSet::new();
|
||||
for highlight in doc.highlights {
|
||||
let attr = FieldId::new(highlight.attribute);
|
||||
let name = schema.name(attr);
|
||||
matching_attributes.insert(name);
|
||||
}
|
||||
|
||||
let matching_attributes = Vec::from_iter(matching_attributes);
|
||||
println!("matching in: {:?}", matching_attributes);
|
||||
|
||||
println!();
|
||||
}
|
||||
|
||||
eprintln!(
|
||||
"whole documents fields retrieve took {:.2?}",
|
||||
retrieve_duration
|
||||
);
|
||||
eprintln!(
|
||||
"===== Found {} results in {:.2?} =====",
|
||||
number_of_documents,
|
||||
start_total.elapsed()
|
||||
);
|
||||
}
|
||||
Err(err) => {
|
||||
println!("Error: {:?}", err);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
readline.save_history("query-history.txt").unwrap();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn show_updates_command(
|
||||
command: ShowUpdatesCommand,
|
||||
database: Database,
|
||||
) -> Result<(), Box<dyn Error>> {
|
||||
let db = &database;
|
||||
let index = database
|
||||
.open_index(&command.index_uid)
|
||||
.expect("Could not find index");
|
||||
|
||||
let reader = db.update_read_txn().unwrap();
|
||||
let updates = index.all_updates_status(&reader)?;
|
||||
println!("{:#?}", updates);
|
||||
reader.abort().unwrap();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn main() -> Result<(), Box<dyn Error>> {
|
||||
env_logger::init();
|
||||
|
||||
let opt = Command::from_args();
|
||||
let database = Database::open_or_create(opt.path(), DatabaseOptions::default())?;
|
||||
|
||||
match opt {
|
||||
Command::Index(command) => index_command(command, database),
|
||||
Command::Search(command) => search_command(command, database),
|
||||
Command::ShowUpdates(command) => show_updates_command(command, database),
|
||||
}
|
||||
}
|
@ -1,53 +0,0 @@
|
||||
use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA};
|
||||
use once_cell::sync::OnceCell;
|
||||
|
||||
static LEVDIST0: OnceCell<LevBuilder> = OnceCell::new();
|
||||
static LEVDIST1: OnceCell<LevBuilder> = OnceCell::new();
|
||||
static LEVDIST2: OnceCell<LevBuilder> = OnceCell::new();
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
enum PrefixSetting {
|
||||
Prefix,
|
||||
NoPrefix,
|
||||
}
|
||||
|
||||
fn build_dfa_with_setting(query: &str, setting: PrefixSetting) -> DFA {
|
||||
use PrefixSetting::{NoPrefix, Prefix};
|
||||
|
||||
match query.len() {
|
||||
0..=4 => {
|
||||
let builder = LEVDIST0.get_or_init(|| LevBuilder::new(0, true));
|
||||
match setting {
|
||||
Prefix => builder.build_prefix_dfa(query),
|
||||
NoPrefix => builder.build_dfa(query),
|
||||
}
|
||||
}
|
||||
5..=8 => {
|
||||
let builder = LEVDIST1.get_or_init(|| LevBuilder::new(1, true));
|
||||
match setting {
|
||||
Prefix => builder.build_prefix_dfa(query),
|
||||
NoPrefix => builder.build_dfa(query),
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
let builder = LEVDIST2.get_or_init(|| LevBuilder::new(2, true));
|
||||
match setting {
|
||||
Prefix => builder.build_prefix_dfa(query),
|
||||
NoPrefix => builder.build_dfa(query),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn build_prefix_dfa(query: &str) -> DFA {
|
||||
build_dfa_with_setting(query, PrefixSetting::Prefix)
|
||||
}
|
||||
|
||||
pub fn build_dfa(query: &str) -> DFA {
|
||||
build_dfa_with_setting(query, PrefixSetting::NoPrefix)
|
||||
}
|
||||
|
||||
pub fn build_exact_dfa(query: &str) -> DFA {
|
||||
let builder = LEVDIST0.get_or_init(|| LevBuilder::new(0, true));
|
||||
builder.build_dfa(query)
|
||||
}
|
@ -1,4 +0,0 @@
|
||||
mod dfa;
|
||||
|
||||
pub use self::dfa::{build_dfa, build_prefix_dfa, build_exact_dfa};
|
||||
|
@ -1,679 +0,0 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::mem;
|
||||
use std::ops::Deref;
|
||||
use std::ops::Range;
|
||||
use std::rc::Rc;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::time::Instant;
|
||||
use std::fmt;
|
||||
|
||||
use compact_arena::{SmallArena, Idx32, mk_arena};
|
||||
use log::{debug, error};
|
||||
use sdset::{Set, SetBuf, exponential_search, SetOperation, Counter, duo::OpBuilder};
|
||||
use slice_group_by::{GroupBy, GroupByMut};
|
||||
|
||||
use meilisearch_types::DocIndex;
|
||||
|
||||
use crate::criterion::{Criteria, Context, ContextMut};
|
||||
use crate::distinct_map::{BufferedDistinctMap, DistinctMap};
|
||||
use crate::raw_document::RawDocument;
|
||||
use crate::{database::MainT, reordered_attrs::ReorderedAttrs};
|
||||
use crate::{store, Document, DocumentId, MResult, Index, RankedMap, MainReader, Error};
|
||||
use crate::query_tree::{create_query_tree, traverse_query_tree};
|
||||
use crate::query_tree::{Operation, QueryResult, QueryKind, QueryId, PostingsKey};
|
||||
use crate::query_tree::Context as QTContext;
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct SortResult {
|
||||
pub documents: Vec<Document>,
|
||||
pub nb_hits: usize,
|
||||
pub exhaustive_nb_hit: bool,
|
||||
pub facets: Option<HashMap<String, HashMap<String, usize>>>,
|
||||
pub exhaustive_facets_count: Option<bool>,
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn bucket_sort<'c, FI>(
|
||||
reader: &heed::RoTxn<MainT>,
|
||||
query: &str,
|
||||
range: Range<usize>,
|
||||
facets_docids: Option<SetBuf<DocumentId>>,
|
||||
facet_count_docids: Option<HashMap<String, HashMap<String, (&str, Cow<Set<DocumentId>>)>>>,
|
||||
filter: Option<FI>,
|
||||
criteria: Criteria<'c>,
|
||||
searchable_attrs: Option<ReorderedAttrs>,
|
||||
index: &Index,
|
||||
) -> MResult<SortResult>
|
||||
where
|
||||
FI: Fn(DocumentId) -> bool,
|
||||
{
|
||||
// We delegate the filter work to the distinct query builder,
|
||||
// specifying a distinct rule that has no effect.
|
||||
if filter.is_some() {
|
||||
let distinct = |_| None;
|
||||
let distinct_size = 1;
|
||||
return bucket_sort_with_distinct(
|
||||
reader,
|
||||
query,
|
||||
range,
|
||||
facets_docids,
|
||||
facet_count_docids,
|
||||
filter,
|
||||
distinct,
|
||||
distinct_size,
|
||||
criteria,
|
||||
searchable_attrs,
|
||||
index,
|
||||
);
|
||||
}
|
||||
|
||||
let mut result = SortResult::default();
|
||||
|
||||
let words_set = index.main.words_fst(reader)?;
|
||||
let stop_words = index.main.stop_words_fst(reader)?;
|
||||
|
||||
let context = QTContext {
|
||||
words_set,
|
||||
stop_words,
|
||||
synonyms: index.synonyms,
|
||||
postings_lists: index.postings_lists,
|
||||
prefix_postings_lists: index.prefix_postings_lists_cache,
|
||||
};
|
||||
|
||||
let (operation, mapping) = create_query_tree(reader, &context, query)?;
|
||||
debug!("operation:\n{:?}", operation);
|
||||
debug!("mapping:\n{:?}", mapping);
|
||||
|
||||
fn recurs_operation<'o>(map: &mut HashMap<QueryId, &'o QueryKind>, operation: &'o Operation) {
|
||||
match operation {
|
||||
Operation::And(ops) => ops.iter().for_each(|op| recurs_operation(map, op)),
|
||||
Operation::Or(ops) => ops.iter().for_each(|op| recurs_operation(map, op)),
|
||||
Operation::Query(query) => { map.insert(query.id, &query.kind); },
|
||||
}
|
||||
}
|
||||
|
||||
let mut queries_kinds = HashMap::new();
|
||||
recurs_operation(&mut queries_kinds, &operation);
|
||||
|
||||
let QueryResult { mut docids, queries } = traverse_query_tree(reader, &context, &operation)?;
|
||||
debug!("found {} documents", docids.len());
|
||||
debug!("number of postings {:?}", queries.len());
|
||||
|
||||
if let Some(facets_docids) = facets_docids {
|
||||
let intersection = sdset::duo::OpBuilder::new(docids.as_ref(), facets_docids.as_set())
|
||||
.intersection()
|
||||
.into_set_buf();
|
||||
docids = Cow::Owned(intersection);
|
||||
}
|
||||
|
||||
if let Some(f) = facet_count_docids {
|
||||
// hardcoded value, until approximation optimization
|
||||
result.exhaustive_facets_count = Some(true);
|
||||
result.facets = Some(facet_count(f, &docids));
|
||||
}
|
||||
|
||||
let before = Instant::now();
|
||||
mk_arena!(arena);
|
||||
let mut bare_matches = cleanup_bare_matches(&mut arena, &docids, queries);
|
||||
debug!("matches cleaned in {:.02?}", before.elapsed());
|
||||
|
||||
let before_bucket_sort = Instant::now();
|
||||
|
||||
let before_raw_documents_building = Instant::now();
|
||||
let mut raw_documents = Vec::new();
|
||||
for bare_matches in bare_matches.linear_group_by_key_mut(|sm| sm.document_id) {
|
||||
let raw_document = RawDocument::new(bare_matches, &mut arena, searchable_attrs.as_ref());
|
||||
raw_documents.push(raw_document);
|
||||
}
|
||||
debug!("creating {} candidates documents took {:.02?}",
|
||||
raw_documents.len(),
|
||||
before_raw_documents_building.elapsed(),
|
||||
);
|
||||
|
||||
let before_criterion_loop = Instant::now();
|
||||
let proximity_count = AtomicUsize::new(0);
|
||||
|
||||
let mut groups = vec![raw_documents.as_mut_slice()];
|
||||
|
||||
'criteria: for criterion in criteria.as_ref() {
|
||||
let tmp_groups = mem::replace(&mut groups, Vec::new());
|
||||
let mut documents_seen = 0;
|
||||
|
||||
for mut group in tmp_groups {
|
||||
let before_criterion_preparation = Instant::now();
|
||||
|
||||
let ctx = ContextMut {
|
||||
reader,
|
||||
postings_lists: &mut arena,
|
||||
query_mapping: &mapping,
|
||||
documents_fields_counts_store: index.documents_fields_counts,
|
||||
};
|
||||
|
||||
criterion.prepare(ctx, &mut group)?;
|
||||
debug!("{:?} preparation took {:.02?}", criterion.name(), before_criterion_preparation.elapsed());
|
||||
|
||||
let ctx = Context {
|
||||
postings_lists: &arena,
|
||||
query_mapping: &mapping,
|
||||
};
|
||||
|
||||
let before_criterion_sort = Instant::now();
|
||||
group.sort_unstable_by(|a, b| criterion.evaluate(&ctx, a, b));
|
||||
debug!("{:?} evaluation took {:.02?}", criterion.name(), before_criterion_sort.elapsed());
|
||||
|
||||
for group in group.binary_group_by_mut(|a, b| criterion.eq(&ctx, a, b)) {
|
||||
debug!("{:?} produced a group of size {}", criterion.name(), group.len());
|
||||
|
||||
documents_seen += group.len();
|
||||
groups.push(group);
|
||||
|
||||
// we have sort enough documents if the last document sorted is after
|
||||
// the end of the requested range, we can continue to the next criterion
|
||||
if documents_seen >= range.end {
|
||||
continue 'criteria;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
debug!("criterion loop took {:.02?}", before_criterion_loop.elapsed());
|
||||
debug!("proximity evaluation called {} times", proximity_count.load(Ordering::Relaxed));
|
||||
|
||||
let schema = index.main.schema(reader)?.ok_or(Error::SchemaMissing)?;
|
||||
let iter = raw_documents.into_iter().skip(range.start).take(range.len());
|
||||
let iter = iter.map(|rd| Document::from_raw(rd, &queries_kinds, &arena, searchable_attrs.as_ref(), &schema));
|
||||
let documents = iter.collect();
|
||||
|
||||
debug!("bucket sort took {:.02?}", before_bucket_sort.elapsed());
|
||||
|
||||
result.documents = documents;
|
||||
result.nb_hits = docids.len();
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn bucket_sort_with_distinct<'c, FI, FD>(
|
||||
reader: &heed::RoTxn<MainT>,
|
||||
query: &str,
|
||||
range: Range<usize>,
|
||||
facets_docids: Option<SetBuf<DocumentId>>,
|
||||
facet_count_docids: Option<HashMap<String, HashMap<String, (&str, Cow<Set<DocumentId>>)>>>,
|
||||
filter: Option<FI>,
|
||||
distinct: FD,
|
||||
distinct_size: usize,
|
||||
criteria: Criteria<'c>,
|
||||
searchable_attrs: Option<ReorderedAttrs>,
|
||||
index: &Index,
|
||||
) -> MResult<SortResult>
|
||||
where
|
||||
FI: Fn(DocumentId) -> bool,
|
||||
FD: Fn(DocumentId) -> Option<u64>,
|
||||
{
|
||||
let mut result = SortResult::default();
|
||||
let mut filtered_count = 0;
|
||||
|
||||
let words_set = index.main.words_fst(reader)?;
|
||||
let stop_words = index.main.stop_words_fst(reader)?;
|
||||
|
||||
let context = QTContext {
|
||||
words_set,
|
||||
stop_words,
|
||||
synonyms: index.synonyms,
|
||||
postings_lists: index.postings_lists,
|
||||
prefix_postings_lists: index.prefix_postings_lists_cache,
|
||||
};
|
||||
|
||||
let (operation, mapping) = create_query_tree(reader, &context, query)?;
|
||||
debug!("operation:\n{:?}", operation);
|
||||
debug!("mapping:\n{:?}", mapping);
|
||||
|
||||
fn recurs_operation<'o>(map: &mut HashMap<QueryId, &'o QueryKind>, operation: &'o Operation) {
|
||||
match operation {
|
||||
Operation::And(ops) => ops.iter().for_each(|op| recurs_operation(map, op)),
|
||||
Operation::Or(ops) => ops.iter().for_each(|op| recurs_operation(map, op)),
|
||||
Operation::Query(query) => { map.insert(query.id, &query.kind); },
|
||||
}
|
||||
}
|
||||
|
||||
let mut queries_kinds = HashMap::new();
|
||||
recurs_operation(&mut queries_kinds, &operation);
|
||||
|
||||
let QueryResult { mut docids, queries } = traverse_query_tree(reader, &context, &operation)?;
|
||||
debug!("found {} documents", docids.len());
|
||||
debug!("number of postings {:?}", queries.len());
|
||||
|
||||
if let Some(facets_docids) = facets_docids {
|
||||
let intersection = OpBuilder::new(docids.as_ref(), facets_docids.as_set())
|
||||
.intersection()
|
||||
.into_set_buf();
|
||||
docids = Cow::Owned(intersection);
|
||||
}
|
||||
|
||||
if let Some(f) = facet_count_docids {
|
||||
// hardcoded value, until approximation optimization
|
||||
result.exhaustive_facets_count = Some(true);
|
||||
result.facets = Some(facet_count(f, &docids));
|
||||
}
|
||||
|
||||
let before = Instant::now();
|
||||
mk_arena!(arena);
|
||||
let mut bare_matches = cleanup_bare_matches(&mut arena, &docids, queries);
|
||||
debug!("matches cleaned in {:.02?}", before.elapsed());
|
||||
|
||||
let before_raw_documents_building = Instant::now();
|
||||
let mut raw_documents = Vec::new();
|
||||
for bare_matches in bare_matches.linear_group_by_key_mut(|sm| sm.document_id) {
|
||||
let raw_document = RawDocument::new(bare_matches, &mut arena, searchable_attrs.as_ref());
|
||||
raw_documents.push(raw_document);
|
||||
}
|
||||
debug!("creating {} candidates documents took {:.02?}",
|
||||
raw_documents.len(),
|
||||
before_raw_documents_building.elapsed(),
|
||||
);
|
||||
|
||||
let mut groups = vec![raw_documents.as_mut_slice()];
|
||||
let mut key_cache = HashMap::new();
|
||||
|
||||
let mut filter_map = HashMap::new();
|
||||
// these two variables informs on the current distinct map and
|
||||
// on the raw offset of the start of the group where the
|
||||
// range.start bound is located according to the distinct function
|
||||
let mut distinct_map = DistinctMap::new(distinct_size);
|
||||
let mut distinct_raw_offset = 0;
|
||||
|
||||
'criteria: for criterion in criteria.as_ref() {
|
||||
let tmp_groups = mem::replace(&mut groups, Vec::new());
|
||||
let mut buf_distinct = BufferedDistinctMap::new(&mut distinct_map);
|
||||
let mut documents_seen = 0;
|
||||
|
||||
for mut group in tmp_groups {
|
||||
// if this group does not overlap with the requested range,
|
||||
// push it without sorting and splitting it
|
||||
if documents_seen + group.len() < distinct_raw_offset {
|
||||
documents_seen += group.len();
|
||||
groups.push(group);
|
||||
continue;
|
||||
}
|
||||
|
||||
let ctx = ContextMut {
|
||||
reader,
|
||||
postings_lists: &mut arena,
|
||||
query_mapping: &mapping,
|
||||
documents_fields_counts_store: index.documents_fields_counts,
|
||||
};
|
||||
|
||||
let before_criterion_preparation = Instant::now();
|
||||
criterion.prepare(ctx, &mut group)?;
|
||||
debug!("{:?} preparation took {:.02?}", criterion.name(), before_criterion_preparation.elapsed());
|
||||
|
||||
let ctx = Context {
|
||||
postings_lists: &arena,
|
||||
query_mapping: &mapping,
|
||||
};
|
||||
|
||||
let before_criterion_sort = Instant::now();
|
||||
group.sort_unstable_by(|a, b| criterion.evaluate(&ctx, a, b));
|
||||
debug!("{:?} evaluation took {:.02?}", criterion.name(), before_criterion_sort.elapsed());
|
||||
|
||||
for group in group.binary_group_by_mut(|a, b| criterion.eq(&ctx, a, b)) {
|
||||
// we must compute the real distinguished len of this sub-group
|
||||
for document in group.iter() {
|
||||
let filter_accepted = match &filter {
|
||||
Some(filter) => {
|
||||
let entry = filter_map.entry(document.id);
|
||||
*entry.or_insert_with(|| {
|
||||
let accepted = (filter)(document.id);
|
||||
// we only want to count it out the first time we see it
|
||||
if !accepted {
|
||||
filtered_count += 1;
|
||||
}
|
||||
accepted
|
||||
})
|
||||
}
|
||||
None => true,
|
||||
};
|
||||
|
||||
if filter_accepted {
|
||||
let entry = key_cache.entry(document.id);
|
||||
let mut seen = true;
|
||||
let key = entry.or_insert_with(|| {
|
||||
seen = false;
|
||||
(distinct)(document.id).map(Rc::new)
|
||||
});
|
||||
|
||||
let distinct = match key.clone() {
|
||||
Some(key) => buf_distinct.register(key),
|
||||
None => buf_distinct.register_without_key(),
|
||||
};
|
||||
|
||||
// we only want to count the document if it is the first time we see it and
|
||||
// if it wasn't accepted by distinct
|
||||
if !seen && !distinct {
|
||||
filtered_count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// the requested range end is reached: stop computing distinct
|
||||
if buf_distinct.len() >= range.end {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
documents_seen += group.len();
|
||||
groups.push(group);
|
||||
|
||||
// if this sub-group does not overlap with the requested range
|
||||
// we must update the distinct map and its start index
|
||||
if buf_distinct.len() < range.start {
|
||||
buf_distinct.transfert_to_internal();
|
||||
distinct_raw_offset = documents_seen;
|
||||
}
|
||||
|
||||
// we have sort enough documents if the last document sorted is after
|
||||
// the end of the requested range, we can continue to the next criterion
|
||||
if buf_distinct.len() >= range.end {
|
||||
continue 'criteria;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// once we classified the documents related to the current
|
||||
// automatons we save that as the next valid result
|
||||
let mut seen = BufferedDistinctMap::new(&mut distinct_map);
|
||||
let schema = index.main.schema(reader)?.ok_or(Error::SchemaMissing)?;
|
||||
|
||||
let mut documents = Vec::with_capacity(range.len());
|
||||
for raw_document in raw_documents.into_iter().skip(distinct_raw_offset) {
|
||||
let filter_accepted = match &filter {
|
||||
Some(_) => filter_map.remove(&raw_document.id).unwrap_or_else(|| {
|
||||
error!("error during filtering: expected value for document id {}", &raw_document.id.0);
|
||||
Default::default()
|
||||
}),
|
||||
None => true,
|
||||
};
|
||||
|
||||
if filter_accepted {
|
||||
let key = key_cache.remove(&raw_document.id).unwrap_or_else(|| {
|
||||
error!("error during distinct: expected value for document id {}", &raw_document.id.0);
|
||||
Default::default()
|
||||
});
|
||||
let distinct_accepted = match key {
|
||||
Some(key) => seen.register(key),
|
||||
None => seen.register_without_key(),
|
||||
};
|
||||
|
||||
if distinct_accepted && seen.len() > range.start {
|
||||
documents.push(Document::from_raw(raw_document, &queries_kinds, &arena, searchable_attrs.as_ref(), &schema));
|
||||
if documents.len() == range.len() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
result.documents = documents;
|
||||
result.nb_hits = docids.len() - filtered_count;
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn cleanup_bare_matches<'tag, 'txn>(
|
||||
arena: &mut SmallArena<'tag, PostingsListView<'txn>>,
|
||||
docids: &Set<DocumentId>,
|
||||
queries: HashMap<PostingsKey, Cow<'txn, Set<DocIndex>>>,
|
||||
) -> Vec<BareMatch<'tag>>
|
||||
{
|
||||
let docidslen = docids.len() as f32;
|
||||
let mut bare_matches = Vec::new();
|
||||
|
||||
for (PostingsKey { query, input, distance, is_exact }, matches) in queries {
|
||||
let postings_list_view = PostingsListView::original(Rc::from(input), Rc::new(matches));
|
||||
let pllen = postings_list_view.len() as f32;
|
||||
|
||||
if docidslen / pllen >= 0.8 {
|
||||
let mut offset = 0;
|
||||
for matches in postings_list_view.linear_group_by_key(|m| m.document_id) {
|
||||
let document_id = matches[0].document_id;
|
||||
if docids.contains(&document_id) {
|
||||
let range = postings_list_view.range(offset, matches.len());
|
||||
let posting_list_index = arena.add(range);
|
||||
|
||||
let bare_match = BareMatch {
|
||||
document_id,
|
||||
query_index: query.id,
|
||||
distance,
|
||||
is_exact,
|
||||
postings_list: posting_list_index,
|
||||
};
|
||||
|
||||
bare_matches.push(bare_match);
|
||||
}
|
||||
|
||||
offset += matches.len();
|
||||
}
|
||||
|
||||
} else {
|
||||
let mut offset = 0;
|
||||
for id in docids.as_slice() {
|
||||
let di = DocIndex { document_id: *id, ..DocIndex::default() };
|
||||
let pos = exponential_search(&postings_list_view[offset..], &di).unwrap_or_else(|x| x);
|
||||
|
||||
offset += pos;
|
||||
|
||||
let group = postings_list_view[offset..]
|
||||
.linear_group_by_key(|m| m.document_id)
|
||||
.next()
|
||||
.filter(|matches| matches[0].document_id == *id);
|
||||
|
||||
if let Some(matches) = group {
|
||||
let range = postings_list_view.range(offset, matches.len());
|
||||
let posting_list_index = arena.add(range);
|
||||
|
||||
let bare_match = BareMatch {
|
||||
document_id: *id,
|
||||
query_index: query.id,
|
||||
distance,
|
||||
is_exact,
|
||||
postings_list: posting_list_index,
|
||||
};
|
||||
|
||||
bare_matches.push(bare_match);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let before_raw_documents_presort = Instant::now();
|
||||
bare_matches.sort_unstable_by_key(|sm| sm.document_id);
|
||||
debug!("sort by documents ids took {:.02?}", before_raw_documents_presort.elapsed());
|
||||
|
||||
bare_matches
|
||||
}
|
||||
|
||||
pub struct BareMatch<'tag> {
|
||||
pub document_id: DocumentId,
|
||||
pub query_index: usize,
|
||||
pub distance: u8,
|
||||
pub is_exact: bool,
|
||||
pub postings_list: Idx32<'tag>,
|
||||
}
|
||||
|
||||
impl fmt::Debug for BareMatch<'_> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.debug_struct("BareMatch")
|
||||
.field("document_id", &self.document_id)
|
||||
.field("query_index", &self.query_index)
|
||||
.field("distance", &self.distance)
|
||||
.field("is_exact", &self.is_exact)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct SimpleMatch {
|
||||
pub query_index: usize,
|
||||
pub distance: u8,
|
||||
pub attribute: u16,
|
||||
pub word_index: u16,
|
||||
pub is_exact: bool,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum PostingsListView<'txn> {
|
||||
Original {
|
||||
input: Rc<[u8]>,
|
||||
postings_list: Rc<Cow<'txn, Set<DocIndex>>>,
|
||||
offset: usize,
|
||||
len: usize,
|
||||
},
|
||||
Rewritten {
|
||||
input: Rc<[u8]>,
|
||||
postings_list: SetBuf<DocIndex>,
|
||||
},
|
||||
}
|
||||
|
||||
impl fmt::Debug for PostingsListView<'_> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.debug_struct("PostingsListView")
|
||||
.field("input", &std::str::from_utf8(&self.input()).unwrap())
|
||||
.field("postings_list", &self.as_ref())
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'txn> PostingsListView<'txn> {
|
||||
pub fn original(input: Rc<[u8]>, postings_list: Rc<Cow<'txn, Set<DocIndex>>>) -> PostingsListView<'txn> {
|
||||
let len = postings_list.len();
|
||||
PostingsListView::Original { input, postings_list, offset: 0, len }
|
||||
}
|
||||
|
||||
pub fn rewritten(input: Rc<[u8]>, postings_list: SetBuf<DocIndex>) -> PostingsListView<'txn> {
|
||||
PostingsListView::Rewritten { input, postings_list }
|
||||
}
|
||||
|
||||
pub fn rewrite_with(&mut self, postings_list: SetBuf<DocIndex>) {
|
||||
let input = match self {
|
||||
PostingsListView::Original { input, .. } => input.clone(),
|
||||
PostingsListView::Rewritten { input, .. } => input.clone(),
|
||||
};
|
||||
*self = PostingsListView::rewritten(input, postings_list);
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
match self {
|
||||
PostingsListView::Original { len, .. } => *len,
|
||||
PostingsListView::Rewritten { postings_list, .. } => postings_list.len(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn input(&self) -> &[u8] {
|
||||
match self {
|
||||
PostingsListView::Original { ref input, .. } => input,
|
||||
PostingsListView::Rewritten { ref input, .. } => input,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn range(&self, range_offset: usize, range_len: usize) -> PostingsListView<'txn> {
|
||||
match self {
|
||||
PostingsListView::Original { input, postings_list, offset, len } => {
|
||||
assert!(range_offset + range_len <= *len);
|
||||
PostingsListView::Original {
|
||||
input: input.clone(),
|
||||
postings_list: postings_list.clone(),
|
||||
offset: offset + range_offset,
|
||||
len: range_len,
|
||||
}
|
||||
},
|
||||
PostingsListView::Rewritten { .. } => {
|
||||
panic!("Cannot create a range on a rewritten postings list view");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl AsRef<Set<DocIndex>> for PostingsListView<'_> {
|
||||
fn as_ref(&self) -> &Set<DocIndex> {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for PostingsListView<'_> {
|
||||
type Target = Set<DocIndex>;
|
||||
|
||||
fn deref(&self) -> &Set<DocIndex> {
|
||||
match *self {
|
||||
PostingsListView::Original { ref postings_list, offset, len, .. } => {
|
||||
Set::new_unchecked(&postings_list[offset..offset + len])
|
||||
},
|
||||
PostingsListView::Rewritten { ref postings_list, .. } => postings_list,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// sorts documents ids according to user defined ranking rules.
|
||||
pub fn placeholder_document_sort(
|
||||
document_ids: &mut [DocumentId],
|
||||
index: &store::Index,
|
||||
reader: &MainReader,
|
||||
ranked_map: &RankedMap
|
||||
) -> MResult<()> {
|
||||
use crate::settings::RankingRule;
|
||||
use std::cmp::Ordering;
|
||||
|
||||
enum SortOrder {
|
||||
Asc,
|
||||
Desc,
|
||||
}
|
||||
|
||||
if let Some(ranking_rules) = index.main.ranking_rules(reader)? {
|
||||
let schema = index.main.schema(reader)?
|
||||
.ok_or(Error::SchemaMissing)?;
|
||||
|
||||
// Select custom rules from ranking rules, and map them to custom rules
|
||||
// containing a field_id
|
||||
let ranking_rules = ranking_rules.iter().filter_map(|r|
|
||||
match r {
|
||||
RankingRule::Asc(name) => schema.id(name).map(|f| (f, SortOrder::Asc)),
|
||||
RankingRule::Desc(name) => schema.id(name).map(|f| (f, SortOrder::Desc)),
|
||||
_ => None,
|
||||
}).collect::<Vec<_>>();
|
||||
|
||||
document_ids.sort_unstable_by(|a, b| {
|
||||
for (field_id, order) in &ranking_rules {
|
||||
let a_value = ranked_map.get(*a, *field_id);
|
||||
let b_value = ranked_map.get(*b, *field_id);
|
||||
let (a, b) = match order {
|
||||
SortOrder::Asc => (a_value, b_value),
|
||||
SortOrder::Desc => (b_value, a_value),
|
||||
};
|
||||
match a.cmp(&b) {
|
||||
Ordering::Equal => continue,
|
||||
ordering => return ordering,
|
||||
}
|
||||
}
|
||||
Ordering::Equal
|
||||
});
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// For each entry in facet_docids, calculates the number of documents in the intersection with candidate_docids.
|
||||
pub fn facet_count(
|
||||
facet_docids: HashMap<String, HashMap<String, (&str, Cow<Set<DocumentId>>)>>,
|
||||
candidate_docids: &Set<DocumentId>,
|
||||
) -> HashMap<String, HashMap<String, usize>> {
|
||||
let mut facets_counts = HashMap::with_capacity(facet_docids.len());
|
||||
for (key, doc_map) in facet_docids {
|
||||
let mut count_map = HashMap::with_capacity(doc_map.len());
|
||||
for (_, (value, docids)) in doc_map {
|
||||
let mut counter = Counter::new();
|
||||
let op = OpBuilder::new(docids.as_ref(), candidate_docids).intersection();
|
||||
SetOperation::<DocumentId>::extend_collection(op, &mut counter);
|
||||
count_map.insert(value.to_string(), counter.0);
|
||||
}
|
||||
facets_counts.insert(key, count_map);
|
||||
}
|
||||
facets_counts
|
||||
}
|
@ -1,37 +0,0 @@
|
||||
use std::cmp::Ordering;
|
||||
use slice_group_by::GroupBy;
|
||||
use crate::{RawDocument, MResult};
|
||||
use crate::bucket_sort::SimpleMatch;
|
||||
use super::{Criterion, Context, ContextMut, prepare_bare_matches};
|
||||
|
||||
pub struct Attribute;
|
||||
|
||||
impl Criterion for Attribute {
|
||||
fn name(&self) -> &str { "attribute" }
|
||||
|
||||
fn prepare<'h, 'p, 'tag, 'txn, 'q, 'r>(
|
||||
&self,
|
||||
ctx: ContextMut<'h, 'p, 'tag, 'txn, 'q>,
|
||||
documents: &mut [RawDocument<'r, 'tag>],
|
||||
) -> MResult<()>
|
||||
{
|
||||
prepare_bare_matches(documents, ctx.postings_lists, ctx.query_mapping);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
|
||||
#[inline]
|
||||
fn sum_of_attribute(matches: &[SimpleMatch]) -> usize {
|
||||
let mut sum_of_attribute = 0;
|
||||
for group in matches.linear_group_by_key(|bm| bm.query_index) {
|
||||
sum_of_attribute += group[0].attribute as usize;
|
||||
}
|
||||
sum_of_attribute
|
||||
}
|
||||
|
||||
let lhs = sum_of_attribute(&lhs.processed_matches);
|
||||
let rhs = sum_of_attribute(&rhs.processed_matches);
|
||||
|
||||
lhs.cmp(&rhs)
|
||||
}
|
||||
}
|
@ -1,16 +0,0 @@
|
||||
use std::cmp::Ordering;
|
||||
use crate::RawDocument;
|
||||
use super::{Criterion, Context};
|
||||
|
||||
pub struct DocumentId;
|
||||
|
||||
impl Criterion for DocumentId {
|
||||
fn name(&self) -> &str { "stable document id" }
|
||||
|
||||
fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
|
||||
let lhs = &lhs.id;
|
||||
let rhs = &rhs.id;
|
||||
|
||||
lhs.cmp(rhs)
|
||||
}
|
||||
}
|
@ -1,78 +0,0 @@
|
||||
use std::cmp::{Ordering, Reverse};
|
||||
use std::collections::hash_map::{HashMap, Entry};
|
||||
use meilisearch_schema::IndexedPos;
|
||||
use slice_group_by::GroupBy;
|
||||
use crate::{RawDocument, MResult};
|
||||
use crate::bucket_sort::BareMatch;
|
||||
use super::{Criterion, Context, ContextMut};
|
||||
|
||||
pub struct Exactness;
|
||||
|
||||
impl Criterion for Exactness {
|
||||
fn name(&self) -> &str { "exactness" }
|
||||
|
||||
fn prepare<'h, 'p, 'tag, 'txn, 'q, 'r>(
|
||||
&self,
|
||||
ctx: ContextMut<'h, 'p, 'tag, 'txn, 'q>,
|
||||
documents: &mut [RawDocument<'r, 'tag>],
|
||||
) -> MResult<()>
|
||||
{
|
||||
let store = ctx.documents_fields_counts_store;
|
||||
let reader = ctx.reader;
|
||||
|
||||
'documents: for doc in documents {
|
||||
doc.bare_matches.sort_unstable_by_key(|bm| (bm.query_index, Reverse(bm.is_exact)));
|
||||
|
||||
// mark the document if we find a "one word field" that matches
|
||||
let mut fields_counts = HashMap::new();
|
||||
for group in doc.bare_matches.linear_group_by_key(|bm| bm.query_index) {
|
||||
for group in group.linear_group_by_key(|bm| bm.is_exact) {
|
||||
if !group[0].is_exact { break }
|
||||
|
||||
for bm in group {
|
||||
for di in ctx.postings_lists[bm.postings_list].as_ref() {
|
||||
|
||||
let attr = IndexedPos(di.attribute);
|
||||
let count = match fields_counts.entry(attr) {
|
||||
Entry::Occupied(entry) => *entry.get(),
|
||||
Entry::Vacant(entry) => {
|
||||
let count = store.document_field_count(reader, doc.id, attr)?;
|
||||
*entry.insert(count)
|
||||
},
|
||||
};
|
||||
|
||||
if count == Some(1) {
|
||||
doc.contains_one_word_field = true;
|
||||
continue 'documents
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
|
||||
#[inline]
|
||||
fn sum_exact_query_words(matches: &[BareMatch]) -> usize {
|
||||
let mut sum_exact_query_words = 0;
|
||||
|
||||
for group in matches.linear_group_by_key(|bm| bm.query_index) {
|
||||
sum_exact_query_words += group[0].is_exact as usize;
|
||||
}
|
||||
|
||||
sum_exact_query_words
|
||||
}
|
||||
|
||||
// does it contains a "one word field"
|
||||
lhs.contains_one_word_field.cmp(&rhs.contains_one_word_field).reverse()
|
||||
// if not, with document contains the more exact words
|
||||
.then_with(|| {
|
||||
let lhs = sum_exact_query_words(&lhs.bare_matches);
|
||||
let rhs = sum_exact_query_words(&rhs.bare_matches);
|
||||
lhs.cmp(&rhs).reverse()
|
||||
})
|
||||
}
|
||||
}
|
@ -1,292 +0,0 @@
|
||||
use std::cmp::{self, Ordering};
|
||||
use std::collections::HashMap;
|
||||
use std::ops::Range;
|
||||
|
||||
use compact_arena::SmallArena;
|
||||
use sdset::SetBuf;
|
||||
use slice_group_by::GroupBy;
|
||||
|
||||
use crate::bucket_sort::{SimpleMatch, PostingsListView};
|
||||
use crate::database::MainT;
|
||||
use crate::query_tree::QueryId;
|
||||
use crate::{store, RawDocument, MResult};
|
||||
|
||||
mod typo;
|
||||
mod words;
|
||||
mod proximity;
|
||||
mod attribute;
|
||||
mod words_position;
|
||||
mod exactness;
|
||||
mod document_id;
|
||||
mod sort_by_attr;
|
||||
|
||||
pub use self::typo::Typo;
|
||||
pub use self::words::Words;
|
||||
pub use self::proximity::Proximity;
|
||||
pub use self::attribute::Attribute;
|
||||
pub use self::words_position::WordsPosition;
|
||||
pub use self::exactness::Exactness;
|
||||
pub use self::document_id::DocumentId;
|
||||
pub use self::sort_by_attr::SortByAttr;
|
||||
|
||||
pub trait Criterion {
|
||||
fn name(&self) -> &str;
|
||||
|
||||
fn prepare<'h, 'p, 'tag, 'txn, 'q, 'r>(
|
||||
&self,
|
||||
_ctx: ContextMut<'h, 'p, 'tag, 'txn, 'q>,
|
||||
_documents: &mut [RawDocument<'r, 'tag>],
|
||||
) -> MResult<()>
|
||||
{
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn evaluate<'p, 'tag, 'txn, 'q, 'r>(
|
||||
&self,
|
||||
ctx: &Context<'p, 'tag, 'txn, 'q>,
|
||||
lhs: &RawDocument<'r, 'tag>,
|
||||
rhs: &RawDocument<'r, 'tag>,
|
||||
) -> Ordering;
|
||||
|
||||
#[inline]
|
||||
fn eq<'p, 'tag, 'txn, 'q, 'r>(
|
||||
&self,
|
||||
ctx: &Context<'p, 'tag, 'txn, 'q>,
|
||||
lhs: &RawDocument<'r, 'tag>,
|
||||
rhs: &RawDocument<'r, 'tag>,
|
||||
) -> bool
|
||||
{
|
||||
self.evaluate(ctx, lhs, rhs) == Ordering::Equal
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ContextMut<'h, 'p, 'tag, 'txn, 'q> {
|
||||
pub reader: &'h heed::RoTxn<'h, MainT>,
|
||||
pub postings_lists: &'p mut SmallArena<'tag, PostingsListView<'txn>>,
|
||||
pub query_mapping: &'q HashMap<QueryId, Range<usize>>,
|
||||
pub documents_fields_counts_store: store::DocumentsFieldsCounts,
|
||||
}
|
||||
|
||||
pub struct Context<'p, 'tag, 'txn, 'q> {
|
||||
pub postings_lists: &'p SmallArena<'tag, PostingsListView<'txn>>,
|
||||
pub query_mapping: &'q HashMap<QueryId, Range<usize>>,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct CriteriaBuilder<'a> {
|
||||
inner: Vec<Box<dyn Criterion + 'a>>,
|
||||
}
|
||||
|
||||
impl<'a> CriteriaBuilder<'a> {
|
||||
pub fn new() -> CriteriaBuilder<'a> {
|
||||
CriteriaBuilder { inner: Vec::new() }
|
||||
}
|
||||
|
||||
pub fn with_capacity(capacity: usize) -> CriteriaBuilder<'a> {
|
||||
CriteriaBuilder {
|
||||
inner: Vec::with_capacity(capacity),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn reserve(&mut self, additional: usize) {
|
||||
self.inner.reserve(additional)
|
||||
}
|
||||
|
||||
#[allow(clippy::should_implement_trait)]
|
||||
pub fn add<C: 'a>(mut self, criterion: C) -> CriteriaBuilder<'a>
|
||||
where
|
||||
C: Criterion,
|
||||
{
|
||||
self.push(criterion);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn push<C: 'a>(&mut self, criterion: C)
|
||||
where
|
||||
C: Criterion,
|
||||
{
|
||||
self.inner.push(Box::new(criterion));
|
||||
}
|
||||
|
||||
pub fn build(self) -> Criteria<'a> {
|
||||
Criteria { inner: self.inner }
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Criteria<'a> {
|
||||
inner: Vec<Box<dyn Criterion + 'a>>,
|
||||
}
|
||||
|
||||
impl<'a> Default for Criteria<'a> {
|
||||
fn default() -> Self {
|
||||
CriteriaBuilder::with_capacity(7)
|
||||
.add(Typo)
|
||||
.add(Words)
|
||||
.add(Proximity)
|
||||
.add(Attribute)
|
||||
.add(WordsPosition)
|
||||
.add(Exactness)
|
||||
.add(DocumentId)
|
||||
.build()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> AsRef<[Box<dyn Criterion + 'a>]> for Criteria<'a> {
|
||||
fn as_ref(&self) -> &[Box<dyn Criterion + 'a>] {
|
||||
&self.inner
|
||||
}
|
||||
}
|
||||
|
||||
fn prepare_query_distances<'a, 'tag, 'txn>(
|
||||
documents: &mut [RawDocument<'a, 'tag>],
|
||||
query_mapping: &HashMap<QueryId, Range<usize>>,
|
||||
postings_lists: &SmallArena<'tag, PostingsListView<'txn>>,
|
||||
) {
|
||||
for document in documents {
|
||||
if !document.processed_distances.is_empty() { continue }
|
||||
|
||||
let mut processed = Vec::new();
|
||||
for m in document.bare_matches.iter() {
|
||||
if postings_lists[m.postings_list].is_empty() { continue }
|
||||
|
||||
let range = query_mapping[&(m.query_index as usize)].clone();
|
||||
let new_len = cmp::max(range.end as usize, processed.len());
|
||||
processed.resize(new_len, None);
|
||||
|
||||
for index in range {
|
||||
let index = index as usize;
|
||||
processed[index] = match processed[index] {
|
||||
Some(distance) if distance > m.distance => Some(m.distance),
|
||||
Some(distance) => Some(distance),
|
||||
None => Some(m.distance),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
document.processed_distances = processed;
|
||||
}
|
||||
}
|
||||
|
||||
fn prepare_bare_matches<'a, 'tag, 'txn>(
|
||||
documents: &mut [RawDocument<'a, 'tag>],
|
||||
postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
|
||||
query_mapping: &HashMap<QueryId, Range<usize>>,
|
||||
) {
|
||||
for document in documents {
|
||||
if !document.processed_matches.is_empty() { continue }
|
||||
|
||||
let mut processed = Vec::new();
|
||||
for m in document.bare_matches.iter() {
|
||||
let postings_list = &postings_lists[m.postings_list];
|
||||
processed.reserve(postings_list.len());
|
||||
for di in postings_list.as_ref() {
|
||||
let simple_match = SimpleMatch {
|
||||
query_index: m.query_index,
|
||||
distance: m.distance,
|
||||
attribute: di.attribute,
|
||||
word_index: di.word_index,
|
||||
is_exact: m.is_exact,
|
||||
};
|
||||
processed.push(simple_match);
|
||||
}
|
||||
}
|
||||
|
||||
let processed = multiword_rewrite_matches(&mut processed, query_mapping);
|
||||
document.processed_matches = processed.into_vec();
|
||||
}
|
||||
}
|
||||
|
||||
fn multiword_rewrite_matches(
|
||||
matches: &mut [SimpleMatch],
|
||||
query_mapping: &HashMap<QueryId, Range<usize>>,
|
||||
) -> SetBuf<SimpleMatch>
|
||||
{
|
||||
matches.sort_unstable_by_key(|m| (m.attribute, m.word_index));
|
||||
|
||||
let mut padded_matches = Vec::with_capacity(matches.len());
|
||||
|
||||
// let before_padding = Instant::now();
|
||||
// for each attribute of each document
|
||||
for same_document_attribute in matches.linear_group_by_key(|m| m.attribute) {
|
||||
// padding will only be applied
|
||||
// to word indices in the same attribute
|
||||
let mut padding = 0;
|
||||
let mut iter = same_document_attribute.linear_group_by_key(|m| m.word_index);
|
||||
|
||||
// for each match at the same position
|
||||
// in this document attribute
|
||||
while let Some(same_word_index) = iter.next() {
|
||||
// find the biggest padding
|
||||
let mut biggest = 0;
|
||||
for match_ in same_word_index {
|
||||
let mut replacement = query_mapping[&(match_.query_index as usize)].clone();
|
||||
let replacement_len = replacement.len();
|
||||
let nexts = iter.remainder().linear_group_by_key(|m| m.word_index);
|
||||
|
||||
if let Some(query_index) = replacement.next() {
|
||||
let word_index = match_.word_index + padding as u16;
|
||||
let match_ = SimpleMatch { query_index, word_index, ..*match_ };
|
||||
padded_matches.push(match_);
|
||||
}
|
||||
|
||||
let mut found = false;
|
||||
|
||||
// look ahead and if there already is a match
|
||||
// corresponding to this padding word, abort the padding
|
||||
'padding: for (x, next_group) in nexts.enumerate() {
|
||||
for (i, query_index) in replacement.clone().enumerate().skip(x) {
|
||||
let word_index = match_.word_index + padding as u16 + (i + 1) as u16;
|
||||
let padmatch = SimpleMatch { query_index, word_index, ..*match_ };
|
||||
|
||||
for nmatch_ in next_group {
|
||||
let mut rep = query_mapping[&(nmatch_.query_index as usize)].clone();
|
||||
let query_index = rep.next().unwrap();
|
||||
if query_index == padmatch.query_index {
|
||||
if !found {
|
||||
// if we find a corresponding padding for the
|
||||
// first time we must push preceding paddings
|
||||
for (i, query_index) in replacement.clone().enumerate().take(i) {
|
||||
let word_index = match_.word_index + padding as u16 + (i + 1) as u16;
|
||||
let match_ = SimpleMatch { query_index, word_index, ..*match_ };
|
||||
padded_matches.push(match_);
|
||||
biggest = biggest.max(i + 1);
|
||||
}
|
||||
}
|
||||
|
||||
padded_matches.push(padmatch);
|
||||
found = true;
|
||||
continue 'padding;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// if we do not find a corresponding padding in the
|
||||
// next groups so stop here and pad what was found
|
||||
break;
|
||||
}
|
||||
|
||||
if !found {
|
||||
// if no padding was found in the following matches
|
||||
// we must insert the entire padding
|
||||
for (i, query_index) in replacement.enumerate() {
|
||||
let word_index = match_.word_index + padding as u16 + (i + 1) as u16;
|
||||
let match_ = SimpleMatch { query_index, word_index, ..*match_ };
|
||||
padded_matches.push(match_);
|
||||
}
|
||||
|
||||
biggest = biggest.max(replacement_len - 1);
|
||||
}
|
||||
}
|
||||
|
||||
padding += biggest;
|
||||
}
|
||||
}
|
||||
|
||||
// debug!("padding matches took {:.02?}", before_padding.elapsed());
|
||||
|
||||
// With this check we can see that the loop above takes something
|
||||
// like 43% of the search time even when no rewrite is needed.
|
||||
// assert_eq!(before_matches, padded_matches);
|
||||
|
||||
SetBuf::from_dirty(padded_matches)
|
||||
}
|
@ -1,68 +0,0 @@
|
||||
use std::cmp::{self, Ordering};
|
||||
use slice_group_by::GroupBy;
|
||||
use crate::bucket_sort::{SimpleMatch};
|
||||
use crate::{RawDocument, MResult};
|
||||
use super::{Criterion, Context, ContextMut, prepare_bare_matches};
|
||||
|
||||
const MAX_DISTANCE: u16 = 8;
|
||||
|
||||
pub struct Proximity;
|
||||
|
||||
impl Criterion for Proximity {
|
||||
fn name(&self) -> &str { "proximity" }
|
||||
|
||||
fn prepare<'h, 'p, 'tag, 'txn, 'q, 'r>(
|
||||
&self,
|
||||
ctx: ContextMut<'h, 'p, 'tag, 'txn, 'q>,
|
||||
documents: &mut [RawDocument<'r, 'tag>],
|
||||
) -> MResult<()>
|
||||
{
|
||||
prepare_bare_matches(documents, ctx.postings_lists, ctx.query_mapping);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
|
||||
fn index_proximity(lhs: u16, rhs: u16) -> u16 {
|
||||
if lhs < rhs {
|
||||
cmp::min(rhs - lhs, MAX_DISTANCE)
|
||||
} else {
|
||||
cmp::min(lhs - rhs, MAX_DISTANCE) + 1
|
||||
}
|
||||
}
|
||||
|
||||
fn attribute_proximity(lhs: SimpleMatch, rhs: SimpleMatch) -> u16 {
|
||||
if lhs.attribute != rhs.attribute { MAX_DISTANCE }
|
||||
else { index_proximity(lhs.word_index, rhs.word_index) }
|
||||
}
|
||||
|
||||
fn min_proximity(lhs: &[SimpleMatch], rhs: &[SimpleMatch]) -> u16 {
|
||||
let mut min_prox = u16::max_value();
|
||||
for a in lhs {
|
||||
for b in rhs {
|
||||
let prox = attribute_proximity(*a, *b);
|
||||
min_prox = cmp::min(min_prox, prox);
|
||||
}
|
||||
}
|
||||
min_prox
|
||||
}
|
||||
|
||||
fn matches_proximity(matches: &[SimpleMatch],) -> u16 {
|
||||
let mut proximity = 0;
|
||||
let mut iter = matches.linear_group_by_key(|m| m.query_index);
|
||||
|
||||
// iterate over groups by windows of size 2
|
||||
let mut last = iter.next();
|
||||
while let (Some(lhs), Some(rhs)) = (last, iter.next()) {
|
||||
proximity += min_proximity(lhs, rhs);
|
||||
last = Some(rhs);
|
||||
}
|
||||
|
||||
proximity
|
||||
}
|
||||
|
||||
let lhs = matches_proximity(&lhs.processed_matches);
|
||||
let rhs = matches_proximity(&rhs.processed_matches);
|
||||
|
||||
lhs.cmp(&rhs)
|
||||
}
|
||||
}
|
@ -1,129 +0,0 @@
|
||||
use std::cmp::Ordering;
|
||||
use std::error::Error;
|
||||
use std::fmt;
|
||||
use meilisearch_schema::{Schema, FieldId};
|
||||
use crate::{RankedMap, RawDocument};
|
||||
use super::{Criterion, Context};
|
||||
|
||||
/// An helper struct that permit to sort documents by
|
||||
/// some of their stored attributes.
|
||||
///
|
||||
/// # Note
|
||||
///
|
||||
/// If a document cannot be deserialized it will be considered [`None`][].
|
||||
///
|
||||
/// Deserialized documents are compared like `Some(doc0).cmp(&Some(doc1))`,
|
||||
/// so you must check the [`Ord`] of `Option` implementation.
|
||||
///
|
||||
/// [`None`]: https://doc.rust-lang.org/std/option/enum.Option.html#variant.None
|
||||
/// [`Ord`]: https://doc.rust-lang.org/std/option/enum.Option.html#impl-Ord
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```ignore
|
||||
/// use serde_derive::Deserialize;
|
||||
/// use meilisearch::rank::criterion::*;
|
||||
///
|
||||
/// let custom_ranking = SortByAttr::lower_is_better(&ranked_map, &schema, "published_at")?;
|
||||
///
|
||||
/// let builder = CriteriaBuilder::with_capacity(8)
|
||||
/// .add(Typo)
|
||||
/// .add(Words)
|
||||
/// .add(Proximity)
|
||||
/// .add(Attribute)
|
||||
/// .add(WordsPosition)
|
||||
/// .add(Exactness)
|
||||
/// .add(custom_ranking)
|
||||
/// .add(DocumentId);
|
||||
///
|
||||
/// let criterion = builder.build();
|
||||
///
|
||||
/// ```
|
||||
pub struct SortByAttr<'a> {
|
||||
ranked_map: &'a RankedMap,
|
||||
field_id: FieldId,
|
||||
reversed: bool,
|
||||
}
|
||||
|
||||
impl<'a> SortByAttr<'a> {
|
||||
pub fn lower_is_better(
|
||||
ranked_map: &'a RankedMap,
|
||||
schema: &Schema,
|
||||
attr_name: &str,
|
||||
) -> Result<SortByAttr<'a>, SortByAttrError> {
|
||||
SortByAttr::new(ranked_map, schema, attr_name, false)
|
||||
}
|
||||
|
||||
pub fn higher_is_better(
|
||||
ranked_map: &'a RankedMap,
|
||||
schema: &Schema,
|
||||
attr_name: &str,
|
||||
) -> Result<SortByAttr<'a>, SortByAttrError> {
|
||||
SortByAttr::new(ranked_map, schema, attr_name, true)
|
||||
}
|
||||
|
||||
fn new(
|
||||
ranked_map: &'a RankedMap,
|
||||
schema: &Schema,
|
||||
attr_name: &str,
|
||||
reversed: bool,
|
||||
) -> Result<SortByAttr<'a>, SortByAttrError> {
|
||||
let field_id = match schema.id(attr_name) {
|
||||
Some(field_id) => field_id,
|
||||
None => return Err(SortByAttrError::AttributeNotFound),
|
||||
};
|
||||
|
||||
if !schema.is_ranked(field_id) {
|
||||
return Err(SortByAttrError::AttributeNotRegisteredForRanking);
|
||||
}
|
||||
|
||||
Ok(SortByAttr {
|
||||
ranked_map,
|
||||
field_id,
|
||||
reversed,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Criterion for SortByAttr<'_> {
|
||||
fn name(&self) -> &str {
|
||||
"sort by attribute"
|
||||
}
|
||||
|
||||
fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
|
||||
let lhs = self.ranked_map.get(lhs.id, self.field_id);
|
||||
let rhs = self.ranked_map.get(rhs.id, self.field_id);
|
||||
|
||||
match (lhs, rhs) {
|
||||
(Some(lhs), Some(rhs)) => {
|
||||
let order = lhs.cmp(&rhs);
|
||||
if self.reversed {
|
||||
order.reverse()
|
||||
} else {
|
||||
order
|
||||
}
|
||||
}
|
||||
(None, Some(_)) => Ordering::Greater,
|
||||
(Some(_), None) => Ordering::Less,
|
||||
(None, None) => Ordering::Equal,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub enum SortByAttrError {
|
||||
AttributeNotFound,
|
||||
AttributeNotRegisteredForRanking,
|
||||
}
|
||||
|
||||
impl fmt::Display for SortByAttrError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
use SortByAttrError::*;
|
||||
match self {
|
||||
AttributeNotFound => f.write_str("attribute not found in the schema"),
|
||||
AttributeNotRegisteredForRanking => f.write_str("attribute not registered for ranking"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for SortByAttrError {}
|
@ -1,56 +0,0 @@
|
||||
use std::cmp::Ordering;
|
||||
use crate::{RawDocument, MResult};
|
||||
use super::{Criterion, Context, ContextMut, prepare_query_distances};
|
||||
|
||||
pub struct Typo;
|
||||
|
||||
impl Criterion for Typo {
|
||||
fn name(&self) -> &str { "typo" }
|
||||
|
||||
fn prepare<'h, 'p, 'tag, 'txn, 'q, 'r>(
|
||||
&self,
|
||||
ctx: ContextMut<'h, 'p, 'tag, 'txn, 'q>,
|
||||
documents: &mut [RawDocument<'r, 'tag>],
|
||||
) -> MResult<()>
|
||||
{
|
||||
prepare_query_distances(documents, ctx.query_mapping, ctx.postings_lists);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
|
||||
// This function is a wrong logarithmic 10 function.
|
||||
// It is safe to panic on input number higher than 3,
|
||||
// the number of typos is never bigger than that.
|
||||
#[inline]
|
||||
#[allow(clippy::approx_constant)]
|
||||
fn custom_log10(n: u8) -> f32 {
|
||||
match n {
|
||||
0 => 0.0, // log(1)
|
||||
1 => 0.30102, // log(2)
|
||||
2 => 0.47712, // log(3)
|
||||
3 => 0.60205, // log(4)
|
||||
_ => panic!("invalid number"),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn compute_typos(distances: &[Option<u8>]) -> usize {
|
||||
let mut number_words: usize = 0;
|
||||
let mut sum_typos = 0.0;
|
||||
|
||||
for distance in distances {
|
||||
if let Some(distance) = distance {
|
||||
sum_typos += custom_log10(*distance);
|
||||
number_words += 1;
|
||||
}
|
||||
}
|
||||
|
||||
(number_words as f32 / (sum_typos + 1.0) * 1000.0) as usize
|
||||
}
|
||||
|
||||
let lhs = compute_typos(&lhs.processed_distances);
|
||||
let rhs = compute_typos(&rhs.processed_distances);
|
||||
|
||||
lhs.cmp(&rhs).reverse()
|
||||
}
|
||||
}
|
@ -1,31 +0,0 @@
|
||||
use std::cmp::Ordering;
|
||||
use crate::{RawDocument, MResult};
|
||||
use super::{Criterion, Context, ContextMut, prepare_query_distances};
|
||||
|
||||
pub struct Words;
|
||||
|
||||
impl Criterion for Words {
|
||||
fn name(&self) -> &str { "words" }
|
||||
|
||||
fn prepare<'h, 'p, 'tag, 'txn, 'q, 'r>(
|
||||
&self,
|
||||
ctx: ContextMut<'h, 'p, 'tag, 'txn, 'q>,
|
||||
documents: &mut [RawDocument<'r, 'tag>],
|
||||
) -> MResult<()>
|
||||
{
|
||||
prepare_query_distances(documents, ctx.query_mapping, ctx.postings_lists);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
|
||||
#[inline]
|
||||
fn number_of_query_words(distances: &[Option<u8>]) -> usize {
|
||||
distances.iter().cloned().filter(Option::is_some).count()
|
||||
}
|
||||
|
||||
let lhs = number_of_query_words(&lhs.processed_distances);
|
||||
let rhs = number_of_query_words(&rhs.processed_distances);
|
||||
|
||||
lhs.cmp(&rhs).reverse()
|
||||
}
|
||||
}
|
@ -1,37 +0,0 @@
|
||||
use std::cmp::Ordering;
|
||||
use slice_group_by::GroupBy;
|
||||
use crate::bucket_sort::SimpleMatch;
|
||||
use crate::{RawDocument, MResult};
|
||||
use super::{Criterion, Context, ContextMut, prepare_bare_matches};
|
||||
|
||||
pub struct WordsPosition;
|
||||
|
||||
impl Criterion for WordsPosition {
|
||||
fn name(&self) -> &str { "words position" }
|
||||
|
||||
fn prepare<'h, 'p, 'tag, 'txn, 'q, 'r>(
|
||||
&self,
|
||||
ctx: ContextMut<'h, 'p, 'tag, 'txn, 'q>,
|
||||
documents: &mut [RawDocument<'r, 'tag>],
|
||||
) -> MResult<()>
|
||||
{
|
||||
prepare_bare_matches(documents, ctx.postings_lists, ctx.query_mapping);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
|
||||
#[inline]
|
||||
fn sum_words_position(matches: &[SimpleMatch]) -> usize {
|
||||
let mut sum_words_position = 0;
|
||||
for group in matches.linear_group_by_key(|bm| bm.query_index) {
|
||||
sum_words_position += group[0].word_index as usize;
|
||||
}
|
||||
sum_words_position
|
||||
}
|
||||
|
||||
let lhs = sum_words_position(&lhs.processed_matches);
|
||||
let rhs = sum_words_position(&rhs.processed_matches);
|
||||
|
||||
lhs.cmp(&rhs)
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -1,103 +0,0 @@
|
||||
use hashbrown::HashMap;
|
||||
use std::hash::Hash;
|
||||
|
||||
pub struct DistinctMap<K> {
|
||||
inner: HashMap<K, usize>,
|
||||
limit: usize,
|
||||
len: usize,
|
||||
}
|
||||
|
||||
impl<K: Hash + Eq> DistinctMap<K> {
|
||||
pub fn new(limit: usize) -> Self {
|
||||
DistinctMap {
|
||||
inner: HashMap::new(),
|
||||
limit,
|
||||
len: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
self.len
|
||||
}
|
||||
}
|
||||
|
||||
pub struct BufferedDistinctMap<'a, K> {
|
||||
internal: &'a mut DistinctMap<K>,
|
||||
inner: HashMap<K, usize>,
|
||||
len: usize,
|
||||
}
|
||||
|
||||
impl<'a, K: Hash + Eq> BufferedDistinctMap<'a, K> {
|
||||
pub fn new(internal: &'a mut DistinctMap<K>) -> BufferedDistinctMap<'a, K> {
|
||||
BufferedDistinctMap {
|
||||
internal,
|
||||
inner: HashMap::new(),
|
||||
len: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn register(&mut self, key: K) -> bool {
|
||||
let internal_seen = self.internal.inner.get(&key).unwrap_or(&0);
|
||||
let inner_seen = self.inner.entry(key).or_insert(0);
|
||||
let seen = *internal_seen + *inner_seen;
|
||||
|
||||
if seen < self.internal.limit {
|
||||
*inner_seen += 1;
|
||||
self.len += 1;
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
pub fn register_without_key(&mut self) -> bool {
|
||||
self.len += 1;
|
||||
true
|
||||
}
|
||||
|
||||
pub fn transfert_to_internal(&mut self) {
|
||||
for (k, v) in self.inner.drain() {
|
||||
let value = self.internal.inner.entry(k).or_insert(0);
|
||||
*value += v;
|
||||
}
|
||||
|
||||
self.internal.len += self.len;
|
||||
self.len = 0;
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
self.internal.len() + self.len
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn easy_distinct_map() {
|
||||
let mut map = DistinctMap::new(2);
|
||||
let mut buffered = BufferedDistinctMap::new(&mut map);
|
||||
|
||||
for x in &[1, 1, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6] {
|
||||
buffered.register(x);
|
||||
}
|
||||
buffered.transfert_to_internal();
|
||||
assert_eq!(map.len(), 8);
|
||||
|
||||
let mut map = DistinctMap::new(2);
|
||||
let mut buffered = BufferedDistinctMap::new(&mut map);
|
||||
assert_eq!(buffered.register(1), true);
|
||||
assert_eq!(buffered.register(1), true);
|
||||
assert_eq!(buffered.register(1), false);
|
||||
assert_eq!(buffered.register(1), false);
|
||||
|
||||
assert_eq!(buffered.register(2), true);
|
||||
assert_eq!(buffered.register(3), true);
|
||||
assert_eq!(buffered.register(2), true);
|
||||
assert_eq!(buffered.register(2), false);
|
||||
|
||||
buffered.transfert_to_internal();
|
||||
assert_eq!(map.len(), 5);
|
||||
}
|
||||
}
|
@ -1,224 +0,0 @@
|
||||
use crate::serde::{DeserializerError, SerializerError};
|
||||
use serde_json::Error as SerdeJsonError;
|
||||
use pest::error::Error as PestError;
|
||||
use crate::filters::Rule;
|
||||
use std::{error, fmt, io};
|
||||
|
||||
pub use bincode::Error as BincodeError;
|
||||
pub use fst::Error as FstError;
|
||||
pub use heed::Error as HeedError;
|
||||
pub use pest::error as pest_error;
|
||||
|
||||
use meilisearch_error::{ErrorCode, Code};
|
||||
|
||||
pub type MResult<T> = Result<T, Error>;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
Bincode(bincode::Error),
|
||||
Deserializer(DeserializerError),
|
||||
FacetError(FacetError),
|
||||
FilterParseError(PestError<Rule>),
|
||||
Fst(fst::Error),
|
||||
Heed(heed::Error),
|
||||
IndexAlreadyExists,
|
||||
Io(io::Error),
|
||||
MaxFieldsLimitExceeded,
|
||||
MissingDocumentId,
|
||||
MissingPrimaryKey,
|
||||
Schema(meilisearch_schema::Error),
|
||||
SchemaMissing,
|
||||
SerdeJson(SerdeJsonError),
|
||||
Serializer(SerializerError),
|
||||
VersionMismatch(String),
|
||||
WordIndexMissing,
|
||||
}
|
||||
|
||||
impl ErrorCode for Error {
|
||||
fn error_code(&self) -> Code {
|
||||
use Error::*;
|
||||
|
||||
match self {
|
||||
FacetError(_) => Code::Facet,
|
||||
FilterParseError(_) => Code::Filter,
|
||||
IndexAlreadyExists => Code::IndexAlreadyExists,
|
||||
MissingPrimaryKey => Code::MissingPrimaryKey,
|
||||
MissingDocumentId => Code::MissingDocumentId,
|
||||
MaxFieldsLimitExceeded => Code::MaxFieldsLimitExceeded,
|
||||
Schema(s) => s.error_code(),
|
||||
WordIndexMissing
|
||||
| SchemaMissing => Code::InvalidState,
|
||||
Heed(_)
|
||||
| Fst(_)
|
||||
| SerdeJson(_)
|
||||
| Bincode(_)
|
||||
| Serializer(_)
|
||||
| Deserializer(_)
|
||||
| VersionMismatch(_)
|
||||
| Io(_) => Code::Internal,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<io::Error> for Error {
|
||||
fn from(error: io::Error) -> Error {
|
||||
Error::Io(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<PestError<Rule>> for Error {
|
||||
fn from(error: PestError<Rule>) -> Error {
|
||||
Error::FilterParseError(error.renamed_rules(|r| {
|
||||
let s = match r {
|
||||
Rule::or => "OR",
|
||||
Rule::and => "AND",
|
||||
Rule::not => "NOT",
|
||||
Rule::string => "string",
|
||||
Rule::word => "word",
|
||||
Rule::greater => "field > value",
|
||||
Rule::less => "field < value",
|
||||
Rule::eq => "field = value",
|
||||
Rule::leq => "field <= value",
|
||||
Rule::geq => "field >= value",
|
||||
Rule::key => "key",
|
||||
_ => "other",
|
||||
};
|
||||
s.to_string()
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<FacetError> for Error {
|
||||
fn from(error: FacetError) -> Error {
|
||||
Error::FacetError(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<meilisearch_schema::Error> for Error {
|
||||
fn from(error: meilisearch_schema::Error) -> Error {
|
||||
Error::Schema(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<HeedError> for Error {
|
||||
fn from(error: HeedError) -> Error {
|
||||
Error::Heed(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<FstError> for Error {
|
||||
fn from(error: FstError) -> Error {
|
||||
Error::Fst(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<SerdeJsonError> for Error {
|
||||
fn from(error: SerdeJsonError) -> Error {
|
||||
Error::SerdeJson(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<BincodeError> for Error {
|
||||
fn from(error: BincodeError) -> Error {
|
||||
Error::Bincode(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<SerializerError> for Error {
|
||||
fn from(error: SerializerError) -> Error {
|
||||
match error {
|
||||
SerializerError::DocumentIdNotFound => Error::MissingDocumentId,
|
||||
e => Error::Serializer(e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<DeserializerError> for Error {
|
||||
fn from(error: DeserializerError) -> Error {
|
||||
Error::Deserializer(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
use self::Error::*;
|
||||
match self {
|
||||
Bincode(e) => write!(f, "bincode error; {}", e),
|
||||
Deserializer(e) => write!(f, "deserializer error; {}", e),
|
||||
FacetError(e) => write!(f, "error processing facet filter: {}", e),
|
||||
FilterParseError(e) => write!(f, "error parsing filter; {}", e),
|
||||
Fst(e) => write!(f, "fst error; {}", e),
|
||||
Heed(e) => write!(f, "heed error; {}", e),
|
||||
IndexAlreadyExists => write!(f, "index already exists"),
|
||||
Io(e) => write!(f, "{}", e),
|
||||
MaxFieldsLimitExceeded => write!(f, "maximum number of fields in a document exceeded"),
|
||||
MissingDocumentId => write!(f, "document id is missing"),
|
||||
MissingPrimaryKey => write!(f, "schema cannot be built without a primary key"),
|
||||
Schema(e) => write!(f, "schema error; {}", e),
|
||||
SchemaMissing => write!(f, "this index does not have a schema"),
|
||||
SerdeJson(e) => write!(f, "serde json error; {}", e),
|
||||
Serializer(e) => write!(f, "serializer error; {}", e),
|
||||
VersionMismatch(version) => write!(f, "Cannot open database, expected MeiliSearch engine version: {}, current engine version: {}.{}.{}",
|
||||
version,
|
||||
env!("CARGO_PKG_VERSION_MAJOR"),
|
||||
env!("CARGO_PKG_VERSION_MINOR"),
|
||||
env!("CARGO_PKG_VERSION_PATCH")),
|
||||
WordIndexMissing => write!(f, "this index does not have a word index"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl error::Error for Error {}
|
||||
|
||||
struct FilterParseError(PestError<Rule>);
|
||||
|
||||
impl fmt::Display for FilterParseError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
use crate::pest_error::LineColLocation::*;
|
||||
|
||||
let (line, column) = match self.0.line_col {
|
||||
Span((line, _), (column, _)) => (line, column),
|
||||
Pos((line, column)) => (line, column),
|
||||
};
|
||||
write!(f, "parsing error on line {} at column {}: {}", line, column, self.0.variant.message())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum FacetError {
|
||||
EmptyArray,
|
||||
ParsingError(String),
|
||||
UnexpectedToken { expected: &'static [&'static str], found: String },
|
||||
InvalidFormat(String),
|
||||
AttributeNotFound(String),
|
||||
AttributeNotSet { expected: Vec<String>, found: String },
|
||||
InvalidDocumentAttribute(String),
|
||||
NoAttributesForFaceting,
|
||||
}
|
||||
|
||||
impl FacetError {
|
||||
pub fn unexpected_token(expected: &'static [&'static str], found: impl ToString) -> FacetError {
|
||||
FacetError::UnexpectedToken{ expected, found: found.to_string() }
|
||||
}
|
||||
|
||||
pub fn attribute_not_set(expected: Vec<String>, found: impl ToString) -> FacetError {
|
||||
FacetError::AttributeNotSet{ expected, found: found.to_string() }
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for FacetError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
use FacetError::*;
|
||||
|
||||
match self {
|
||||
EmptyArray => write!(f, "empty array in facet filter is unspecified behavior"),
|
||||
ParsingError(msg) => write!(f, "parsing error: {}", msg),
|
||||
UnexpectedToken { expected, found } => write!(f, "unexpected token {}, expected {}", found, expected.join("or")),
|
||||
InvalidFormat(found) => write!(f, "invalid facet: {}, facets should be \"facetName:facetValue\"", found),
|
||||
AttributeNotFound(attr) => write!(f, "unknown {:?} attribute", attr),
|
||||
AttributeNotSet { found, expected } => write!(f, "`{}` is not set as a faceted attribute. available facet attributes: {}", found, expected.join(", ")),
|
||||
InvalidDocumentAttribute(attr) => write!(f, "invalid document attribute {}, accepted types: String and [String]", attr),
|
||||
NoAttributesForFaceting => write!(f, "impossible to perform faceted search, no attributes for faceting are set"),
|
||||
}
|
||||
}
|
||||
}
|
@ -1,357 +0,0 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::hash::Hash;
|
||||
use std::ops::Deref;
|
||||
|
||||
use cow_utils::CowUtils;
|
||||
use either::Either;
|
||||
use heed::types::{Str, OwnedType};
|
||||
use indexmap::IndexMap;
|
||||
use serde_json::Value;
|
||||
|
||||
use meilisearch_schema::{FieldId, Schema};
|
||||
use meilisearch_types::DocumentId;
|
||||
|
||||
use crate::database::MainT;
|
||||
use crate::error::{FacetError, MResult};
|
||||
use crate::store::BEU16;
|
||||
|
||||
/// Data structure used to represent a boolean expression in the form of nested arrays.
|
||||
/// Values in the outer array are and-ed together, values in the inner arrays are or-ed together.
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct FacetFilter(Vec<Either<Vec<FacetKey>, FacetKey>>);
|
||||
|
||||
impl Deref for FacetFilter {
|
||||
type Target = Vec<Either<Vec<FacetKey>, FacetKey>>;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl FacetFilter {
|
||||
pub fn from_str(
|
||||
s: &str,
|
||||
schema: &Schema,
|
||||
attributes_for_faceting: &[FieldId],
|
||||
) -> MResult<FacetFilter> {
|
||||
if attributes_for_faceting.is_empty() {
|
||||
return Err(FacetError::NoAttributesForFaceting.into());
|
||||
}
|
||||
let parsed = serde_json::from_str::<Value>(s).map_err(|e| FacetError::ParsingError(e.to_string()))?;
|
||||
let mut filter = Vec::new();
|
||||
match parsed {
|
||||
Value::Array(and_exprs) => {
|
||||
if and_exprs.is_empty() {
|
||||
return Err(FacetError::EmptyArray.into());
|
||||
}
|
||||
for expr in and_exprs {
|
||||
match expr {
|
||||
Value::String(s) => {
|
||||
let key = FacetKey::from_str( &s, schema, attributes_for_faceting)?;
|
||||
filter.push(Either::Right(key));
|
||||
}
|
||||
Value::Array(or_exprs) => {
|
||||
if or_exprs.is_empty() {
|
||||
return Err(FacetError::EmptyArray.into());
|
||||
}
|
||||
let mut inner = Vec::new();
|
||||
for expr in or_exprs {
|
||||
match expr {
|
||||
Value::String(s) => {
|
||||
let key = FacetKey::from_str( &s, schema, attributes_for_faceting)?;
|
||||
inner.push(key);
|
||||
}
|
||||
bad_value => return Err(FacetError::unexpected_token(&["String"], bad_value).into()),
|
||||
}
|
||||
}
|
||||
filter.push(Either::Left(inner));
|
||||
}
|
||||
bad_value => return Err(FacetError::unexpected_token(&["Array", "String"], bad_value).into()),
|
||||
}
|
||||
}
|
||||
Ok(Self(filter))
|
||||
}
|
||||
bad_value => Err(FacetError::unexpected_token(&["Array"], bad_value).into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Eq, PartialEq, Hash)]
|
||||
#[repr(C)]
|
||||
pub struct FacetKey(FieldId, String);
|
||||
|
||||
impl FacetKey {
|
||||
pub fn new(field_id: FieldId, value: String) -> Self {
|
||||
let value = match value.cow_to_lowercase() {
|
||||
Cow::Borrowed(_) => value,
|
||||
Cow::Owned(s) => s,
|
||||
};
|
||||
Self(field_id, value)
|
||||
}
|
||||
|
||||
pub fn key(&self) -> FieldId {
|
||||
self.0
|
||||
}
|
||||
|
||||
pub fn value(&self) -> &str {
|
||||
&self.1
|
||||
}
|
||||
|
||||
// TODO improve parser
|
||||
fn from_str(
|
||||
s: &str,
|
||||
schema: &Schema,
|
||||
attributes_for_faceting: &[FieldId],
|
||||
) -> Result<Self, FacetError> {
|
||||
let mut split = s.splitn(2, ':');
|
||||
let key = split
|
||||
.next()
|
||||
.ok_or_else(|| FacetError::InvalidFormat(s.to_string()))?
|
||||
.trim();
|
||||
let field_id = schema
|
||||
.id(key)
|
||||
.ok_or_else(|| FacetError::AttributeNotFound(key.to_string()))?;
|
||||
|
||||
if !attributes_for_faceting.contains(&field_id) {
|
||||
return Err(FacetError::attribute_not_set(
|
||||
attributes_for_faceting
|
||||
.iter()
|
||||
.filter_map(|&id| schema.name(id))
|
||||
.map(str::to_string)
|
||||
.collect::<Vec<_>>(),
|
||||
key))
|
||||
}
|
||||
let value = split
|
||||
.next()
|
||||
.ok_or_else(|| FacetError::InvalidFormat(s.to_string()))?
|
||||
.trim();
|
||||
// unquoting the string if need be:
|
||||
let mut indices = value.char_indices();
|
||||
let value = match (indices.next(), indices.last()) {
|
||||
(Some((s, '\'')), Some((e, '\''))) |
|
||||
(Some((s, '\"')), Some((e, '\"'))) => value[s + 1..e].to_string(),
|
||||
_ => value.to_string(),
|
||||
};
|
||||
Ok(Self::new(field_id, value))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for FacetKey {
|
||||
type EItem = FacetKey;
|
||||
|
||||
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||
let mut buffer = Vec::with_capacity(2 + item.1.len());
|
||||
let id = BEU16::new(item.key().into());
|
||||
let id_bytes = OwnedType::bytes_encode(&id)?;
|
||||
let value_bytes = Str::bytes_encode(item.value())?;
|
||||
buffer.extend_from_slice(id_bytes.as_ref());
|
||||
buffer.extend_from_slice(value_bytes.as_ref());
|
||||
Some(Cow::Owned(buffer))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for FacetKey {
|
||||
type DItem = FacetKey;
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let (id_bytes, value_bytes) = bytes.split_at(2);
|
||||
let id = OwnedType::<BEU16>::bytes_decode(id_bytes)?;
|
||||
let id = id.get().into();
|
||||
let string = Str::bytes_decode(&value_bytes)?;
|
||||
Some(FacetKey(id, string.to_string()))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_to_facet_map(
|
||||
facet_map: &mut HashMap<FacetKey, (String, Vec<DocumentId>)>,
|
||||
field_id: FieldId,
|
||||
value: Value,
|
||||
document_id: DocumentId,
|
||||
) -> Result<(), FacetError> {
|
||||
let value = match value {
|
||||
Value::String(s) => s,
|
||||
// ignore null
|
||||
Value::Null => return Ok(()),
|
||||
value => return Err(FacetError::InvalidDocumentAttribute(value.to_string())),
|
||||
};
|
||||
let key = FacetKey::new(field_id, value.clone());
|
||||
facet_map.entry(key).or_insert_with(|| (value, Vec::new())).1.push(document_id);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn facet_map_from_docids(
|
||||
rtxn: &heed::RoTxn<MainT>,
|
||||
index: &crate::Index,
|
||||
document_ids: &[DocumentId],
|
||||
attributes_for_facetting: &[FieldId],
|
||||
) -> MResult<HashMap<FacetKey, (String, Vec<DocumentId>)>> {
|
||||
// A hashmap that ascociate a facet key to a pair containing the original facet attribute
|
||||
// string with it's case preserved, and a list of document ids for that facet attribute.
|
||||
let mut facet_map: HashMap<FacetKey, (String, Vec<DocumentId>)> = HashMap::new();
|
||||
for document_id in document_ids {
|
||||
for result in index
|
||||
.documents_fields
|
||||
.document_fields(rtxn, *document_id)?
|
||||
{
|
||||
let (field_id, bytes) = result?;
|
||||
if attributes_for_facetting.contains(&field_id) {
|
||||
match serde_json::from_slice(bytes)? {
|
||||
Value::Array(values) => {
|
||||
for v in values {
|
||||
add_to_facet_map(&mut facet_map, field_id, v, *document_id)?;
|
||||
}
|
||||
}
|
||||
v => add_to_facet_map(&mut facet_map, field_id, v, *document_id)?,
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(facet_map)
|
||||
}
|
||||
|
||||
pub fn facet_map_from_docs(
|
||||
schema: &Schema,
|
||||
documents: &HashMap<DocumentId, IndexMap<String, Value>>,
|
||||
attributes_for_facetting: &[FieldId],
|
||||
) -> MResult<HashMap<FacetKey, (String, Vec<DocumentId>)>> {
|
||||
let mut facet_map = HashMap::new();
|
||||
let attributes_for_facetting = attributes_for_facetting
|
||||
.iter()
|
||||
.filter_map(|&id| schema.name(id).map(|name| (id, name)))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
for (id, document) in documents {
|
||||
for (field_id, name) in &attributes_for_facetting {
|
||||
if let Some(value) = document.get(*name) {
|
||||
match value {
|
||||
Value::Array(values) => {
|
||||
for v in values {
|
||||
add_to_facet_map(&mut facet_map, *field_id, v.clone(), *id)?;
|
||||
}
|
||||
}
|
||||
v => add_to_facet_map(&mut facet_map, *field_id, v.clone(), *id)?,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(facet_map)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use meilisearch_schema::Schema;
|
||||
|
||||
#[test]
|
||||
fn test_facet_key() {
|
||||
let mut schema = Schema::default();
|
||||
let id = schema.insert_with_position("hello").unwrap().0;
|
||||
let facet_list = [schema.id("hello").unwrap()];
|
||||
assert_eq!(
|
||||
FacetKey::from_str("hello:12", &schema, &facet_list).unwrap(),
|
||||
FacetKey::new(id, "12".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
FacetKey::from_str("hello:\"foo bar\"", &schema, &facet_list).unwrap(),
|
||||
FacetKey::new(id, "foo bar".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
FacetKey::from_str("hello:'foo bar'", &schema, &facet_list).unwrap(),
|
||||
FacetKey::new(id, "foo bar".to_string())
|
||||
);
|
||||
// weird case
|
||||
assert_eq!(
|
||||
FacetKey::from_str("hello:blabla:machin", &schema, &facet_list).unwrap(),
|
||||
FacetKey::new(id, "blabla:machin".to_string())
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
FacetKey::from_str("hello:\"\"", &schema, &facet_list).unwrap(),
|
||||
FacetKey::new(id, "".to_string())
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
FacetKey::from_str("hello:'", &schema, &facet_list).unwrap(),
|
||||
FacetKey::new(id, "'".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
FacetKey::from_str("hello:''", &schema, &facet_list).unwrap(),
|
||||
FacetKey::new(id, "".to_string())
|
||||
);
|
||||
assert!(FacetKey::from_str("hello", &schema, &facet_list).is_err());
|
||||
assert!(FacetKey::from_str("toto:12", &schema, &facet_list).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_facet_array() {
|
||||
use either::Either::{Left, Right};
|
||||
let mut schema = Schema::default();
|
||||
let _id = schema.insert_with_position("hello").unwrap();
|
||||
let facet_list = [schema.id("hello").unwrap()];
|
||||
assert_eq!(
|
||||
FacetFilter::from_str("[[\"hello:12\"]]", &schema, &facet_list).unwrap(),
|
||||
FacetFilter(vec![Left(vec![FacetKey(FieldId(0), "12".to_string())])])
|
||||
);
|
||||
assert_eq!(
|
||||
FacetFilter::from_str("[\"hello:12\"]", &schema, &facet_list).unwrap(),
|
||||
FacetFilter(vec![Right(FacetKey(FieldId(0), "12".to_string()))])
|
||||
);
|
||||
assert_eq!(
|
||||
FacetFilter::from_str("[\"hello:12\", \"hello:13\"]", &schema, &facet_list).unwrap(),
|
||||
FacetFilter(vec![
|
||||
Right(FacetKey(FieldId(0), "12".to_string())),
|
||||
Right(FacetKey(FieldId(0), "13".to_string()))
|
||||
])
|
||||
);
|
||||
assert_eq!(
|
||||
FacetFilter::from_str("[[\"hello:12\", \"hello:13\"]]", &schema, &facet_list).unwrap(),
|
||||
FacetFilter(vec![Left(vec![
|
||||
FacetKey(FieldId(0), "12".to_string()),
|
||||
FacetKey(FieldId(0), "13".to_string())
|
||||
])])
|
||||
);
|
||||
assert_eq!(
|
||||
FacetFilter::from_str(
|
||||
"[[\"hello:12\", \"hello:13\"], \"hello:14\"]",
|
||||
&schema,
|
||||
&facet_list
|
||||
)
|
||||
.unwrap(),
|
||||
FacetFilter(vec![
|
||||
Left(vec![
|
||||
FacetKey(FieldId(0), "12".to_string()),
|
||||
FacetKey(FieldId(0), "13".to_string())
|
||||
]),
|
||||
Right(FacetKey(FieldId(0), "14".to_string()))
|
||||
])
|
||||
);
|
||||
|
||||
// invalid array depths
|
||||
assert!(FacetFilter::from_str(
|
||||
"[[[\"hello:12\", \"hello:13\"], \"hello:14\"]]",
|
||||
&schema,
|
||||
&facet_list
|
||||
)
|
||||
.is_err());
|
||||
assert!(FacetFilter::from_str(
|
||||
"[[[\"hello:12\", \"hello:13\"]], \"hello:14\"]]",
|
||||
&schema,
|
||||
&facet_list
|
||||
)
|
||||
.is_err());
|
||||
assert!(FacetFilter::from_str("\"hello:14\"", &schema, &facet_list).is_err());
|
||||
|
||||
// unexisting key
|
||||
assert!(FacetFilter::from_str("[\"foo:12\"]", &schema, &facet_list).is_err());
|
||||
|
||||
// invalid facet key
|
||||
assert!(FacetFilter::from_str("[\"foo=12\"]", &schema, &facet_list).is_err());
|
||||
assert!(FacetFilter::from_str("[\"foo12\"]", &schema, &facet_list).is_err());
|
||||
assert!(FacetFilter::from_str("[\"\"]", &schema, &facet_list).is_err());
|
||||
|
||||
// empty array error
|
||||
assert!(FacetFilter::from_str("[]", &schema, &facet_list).is_err());
|
||||
assert!(FacetFilter::from_str("[\"hello:12\", []]", &schema, &facet_list).is_err());
|
||||
}
|
||||
}
|
@ -1,276 +0,0 @@
|
||||
use std::str::FromStr;
|
||||
use std::cmp::Ordering;
|
||||
|
||||
use crate::error::Error;
|
||||
use crate::{store::Index, DocumentId, MainT};
|
||||
use heed::RoTxn;
|
||||
use meilisearch_schema::{FieldId, Schema};
|
||||
use pest::error::{Error as PestError, ErrorVariant};
|
||||
use pest::iterators::Pair;
|
||||
use serde_json::{Value, Number};
|
||||
use super::parser::Rule;
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
enum ConditionType {
|
||||
Greater,
|
||||
Less,
|
||||
Equal,
|
||||
LessEqual,
|
||||
GreaterEqual,
|
||||
NotEqual,
|
||||
}
|
||||
|
||||
/// We need to infer type when the filter is constructed
|
||||
/// and match every possible types it can be parsed into.
|
||||
#[derive(Debug)]
|
||||
struct ConditionValue<'a> {
|
||||
string: &'a str,
|
||||
boolean: Option<bool>,
|
||||
number: Option<Number>
|
||||
}
|
||||
|
||||
impl<'a> ConditionValue<'a> {
|
||||
pub fn new(value: &Pair<'a, Rule>) -> Self {
|
||||
match value.as_rule() {
|
||||
Rule::string | Rule::word => {
|
||||
let string = value.as_str();
|
||||
let boolean = match value.as_str() {
|
||||
"true" => Some(true),
|
||||
"false" => Some(false),
|
||||
_ => None,
|
||||
};
|
||||
let number = Number::from_str(value.as_str()).ok();
|
||||
ConditionValue { string, boolean, number }
|
||||
},
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_str(&self) -> &str {
|
||||
self.string
|
||||
}
|
||||
|
||||
pub fn as_number(&self) -> Option<&Number> {
|
||||
self.number.as_ref()
|
||||
}
|
||||
|
||||
pub fn as_bool(&self) -> Option<bool> {
|
||||
self.boolean
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Condition<'a> {
|
||||
field: FieldId,
|
||||
condition: ConditionType,
|
||||
value: ConditionValue<'a>
|
||||
}
|
||||
|
||||
fn get_field_value<'a>(schema: &Schema, pair: Pair<'a, Rule>) -> Result<(FieldId, ConditionValue<'a>), Error> {
|
||||
let mut items = pair.into_inner();
|
||||
// lexing ensures that we at least have a key
|
||||
let key = items.next().unwrap();
|
||||
let field = schema
|
||||
.id(key.as_str())
|
||||
.ok_or_else(|| PestError::new_from_span(
|
||||
ErrorVariant::CustomError {
|
||||
message: format!(
|
||||
"attribute `{}` not found, available attributes are: {}",
|
||||
key.as_str(),
|
||||
schema.names().collect::<Vec<_>>().join(", ")
|
||||
),
|
||||
},
|
||||
key.as_span()))?;
|
||||
let value = ConditionValue::new(&items.next().unwrap());
|
||||
Ok((field, value))
|
||||
}
|
||||
|
||||
// undefined behavior with big numbers
|
||||
fn compare_numbers(lhs: &Number, rhs: &Number) -> Option<Ordering> {
|
||||
match (lhs.as_i64(), lhs.as_u64(), lhs.as_f64(),
|
||||
rhs.as_i64(), rhs.as_u64(), rhs.as_f64()) {
|
||||
// i64 u64 f64 i64 u64 f64
|
||||
(Some(lhs), _, _, Some(rhs), _, _) => lhs.partial_cmp(&rhs),
|
||||
(_, Some(lhs), _, _, Some(rhs), _) => lhs.partial_cmp(&rhs),
|
||||
(_, _, Some(lhs), _, _, Some(rhs)) => lhs.partial_cmp(&rhs),
|
||||
(_, _, _, _, _, _) => None,
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Condition<'a> {
|
||||
pub fn less(
|
||||
item: Pair<'a, Rule>,
|
||||
schema: &'a Schema,
|
||||
) -> Result<Self, Error> {
|
||||
let (field, value) = get_field_value(schema, item)?;
|
||||
let condition = ConditionType::Less;
|
||||
Ok(Self { field, condition, value })
|
||||
}
|
||||
|
||||
pub fn greater(
|
||||
item: Pair<'a, Rule>,
|
||||
schema: &'a Schema,
|
||||
) -> Result<Self, Error> {
|
||||
let (field, value) = get_field_value(schema, item)?;
|
||||
let condition = ConditionType::Greater;
|
||||
Ok(Self { field, condition, value })
|
||||
}
|
||||
|
||||
pub fn neq(
|
||||
item: Pair<'a, Rule>,
|
||||
schema: &'a Schema,
|
||||
) -> Result<Self, Error> {
|
||||
let (field, value) = get_field_value(schema, item)?;
|
||||
let condition = ConditionType::NotEqual;
|
||||
Ok(Self { field, condition, value })
|
||||
}
|
||||
|
||||
pub fn geq(
|
||||
item: Pair<'a, Rule>,
|
||||
schema: &'a Schema,
|
||||
) -> Result<Self, Error> {
|
||||
let (field, value) = get_field_value(schema, item)?;
|
||||
let condition = ConditionType::GreaterEqual;
|
||||
Ok(Self { field, condition, value })
|
||||
}
|
||||
|
||||
pub fn leq(
|
||||
item: Pair<'a, Rule>,
|
||||
schema: &'a Schema,
|
||||
) -> Result<Self, Error> {
|
||||
let (field, value) = get_field_value(schema, item)?;
|
||||
let condition = ConditionType::LessEqual;
|
||||
Ok(Self { field, condition, value })
|
||||
}
|
||||
|
||||
pub fn eq(
|
||||
item: Pair<'a, Rule>,
|
||||
schema: &'a Schema,
|
||||
) -> Result<Self, Error> {
|
||||
let (field, value) = get_field_value(schema, item)?;
|
||||
let condition = ConditionType::Equal;
|
||||
Ok(Self { field, condition, value })
|
||||
}
|
||||
|
||||
pub fn test(
|
||||
&self,
|
||||
reader: &RoTxn<MainT>,
|
||||
index: &Index,
|
||||
document_id: DocumentId,
|
||||
) -> Result<bool, Error> {
|
||||
match index.document_attribute::<Value>(reader, document_id, self.field)? {
|
||||
Some(Value::Array(values)) => Ok(values.iter().any(|v| self.match_value(Some(v)))),
|
||||
other => Ok(self.match_value(other.as_ref())),
|
||||
}
|
||||
}
|
||||
|
||||
fn match_value(&self, value: Option<&Value>) -> bool {
|
||||
match value {
|
||||
Some(Value::String(s)) => {
|
||||
let value = self.value.as_str();
|
||||
match self.condition {
|
||||
ConditionType::Equal => unicase::eq(value, &s),
|
||||
ConditionType::NotEqual => !unicase::eq(value, &s),
|
||||
_ => false
|
||||
}
|
||||
},
|
||||
Some(Value::Number(n)) => {
|
||||
if let Some(value) = self.value.as_number() {
|
||||
if let Some(ord) = compare_numbers(&n, value) {
|
||||
let res = match self.condition {
|
||||
ConditionType::Equal => ord == Ordering::Equal,
|
||||
ConditionType::NotEqual => ord != Ordering::Equal,
|
||||
ConditionType::GreaterEqual => ord != Ordering::Less,
|
||||
ConditionType::LessEqual => ord != Ordering::Greater,
|
||||
ConditionType::Greater => ord == Ordering::Greater,
|
||||
ConditionType::Less => ord == Ordering::Less,
|
||||
};
|
||||
return res
|
||||
}
|
||||
}
|
||||
false
|
||||
},
|
||||
Some(Value::Bool(b)) => {
|
||||
if let Some(value) = self.value.as_bool() {
|
||||
let res = match self.condition {
|
||||
ConditionType::Equal => *b == value,
|
||||
ConditionType::NotEqual => *b != value,
|
||||
_ => false
|
||||
};
|
||||
return res
|
||||
}
|
||||
false
|
||||
},
|
||||
// if field is not supported (or not found), all values are different from it,
|
||||
// so != should always return true in this case.
|
||||
_ => self.condition == ConditionType::NotEqual,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use serde_json::Number;
|
||||
use std::cmp::Ordering;
|
||||
|
||||
#[test]
|
||||
fn test_number_comp() {
|
||||
// test both u64
|
||||
let n1 = Number::from(1u64);
|
||||
let n2 = Number::from(2u64);
|
||||
assert_eq!(Some(Ordering::Less), compare_numbers(&n1, &n2));
|
||||
assert_eq!(Some(Ordering::Greater), compare_numbers(&n2, &n1));
|
||||
let n1 = Number::from(1u64);
|
||||
let n2 = Number::from(1u64);
|
||||
assert_eq!(Some(Ordering::Equal), compare_numbers(&n1, &n2));
|
||||
|
||||
// test both i64
|
||||
let n1 = Number::from(1i64);
|
||||
let n2 = Number::from(2i64);
|
||||
assert_eq!(Some(Ordering::Less), compare_numbers(&n1, &n2));
|
||||
assert_eq!(Some(Ordering::Greater), compare_numbers(&n2, &n1));
|
||||
let n1 = Number::from(1i64);
|
||||
let n2 = Number::from(1i64);
|
||||
assert_eq!(Some(Ordering::Equal), compare_numbers(&n1, &n2));
|
||||
|
||||
// test both f64
|
||||
let n1 = Number::from_f64(1f64).unwrap();
|
||||
let n2 = Number::from_f64(2f64).unwrap();
|
||||
assert_eq!(Some(Ordering::Less), compare_numbers(&n1, &n2));
|
||||
assert_eq!(Some(Ordering::Greater), compare_numbers(&n2, &n1));
|
||||
let n1 = Number::from_f64(1f64).unwrap();
|
||||
let n2 = Number::from_f64(1f64).unwrap();
|
||||
assert_eq!(Some(Ordering::Equal), compare_numbers(&n1, &n2));
|
||||
|
||||
// test one u64 and one f64
|
||||
let n1 = Number::from_f64(1f64).unwrap();
|
||||
let n2 = Number::from(2u64);
|
||||
assert_eq!(Some(Ordering::Less), compare_numbers(&n1, &n2));
|
||||
assert_eq!(Some(Ordering::Greater), compare_numbers(&n2, &n1));
|
||||
|
||||
// equality
|
||||
let n1 = Number::from_f64(1f64).unwrap();
|
||||
let n2 = Number::from(1u64);
|
||||
assert_eq!(Some(Ordering::Equal), compare_numbers(&n1, &n2));
|
||||
assert_eq!(Some(Ordering::Equal), compare_numbers(&n2, &n1));
|
||||
|
||||
// float is neg
|
||||
let n1 = Number::from_f64(-1f64).unwrap();
|
||||
let n2 = Number::from(1u64);
|
||||
assert_eq!(Some(Ordering::Less), compare_numbers(&n1, &n2));
|
||||
assert_eq!(Some(Ordering::Greater), compare_numbers(&n2, &n1));
|
||||
|
||||
// float is too big
|
||||
let n1 = Number::from_f64(std::f64::MAX).unwrap();
|
||||
let n2 = Number::from(1u64);
|
||||
assert_eq!(Some(Ordering::Greater), compare_numbers(&n1, &n2));
|
||||
assert_eq!(Some(Ordering::Less), compare_numbers(&n2, &n1));
|
||||
|
||||
// misc
|
||||
let n1 = Number::from_f64(std::f64::MAX).unwrap();
|
||||
let n2 = Number::from(std::u64::MAX);
|
||||
assert_eq!(Some(Ordering::Greater), compare_numbers(&n1, &n2));
|
||||
assert_eq!(Some( Ordering::Less ), compare_numbers(&n2, &n1));
|
||||
}
|
||||
}
|
@ -1,127 +0,0 @@
|
||||
mod parser;
|
||||
mod condition;
|
||||
|
||||
pub(crate) use parser::Rule;
|
||||
|
||||
use std::ops::Not;
|
||||
|
||||
use condition::Condition;
|
||||
use crate::error::Error;
|
||||
use crate::{DocumentId, MainT, store::Index};
|
||||
use heed::RoTxn;
|
||||
use meilisearch_schema::Schema;
|
||||
use parser::{PREC_CLIMBER, FilterParser};
|
||||
use pest::iterators::{Pair, Pairs};
|
||||
use pest::Parser;
|
||||
|
||||
type FilterResult<'a> = Result<Filter<'a>, Error>;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Filter<'a> {
|
||||
Condition(Condition<'a>),
|
||||
Or(Box<Self>, Box<Self>),
|
||||
And(Box<Self>, Box<Self>),
|
||||
Not(Box<Self>),
|
||||
}
|
||||
|
||||
impl<'a> Filter<'a> {
|
||||
pub fn parse(expr: &'a str, schema: &'a Schema) -> FilterResult<'a> {
|
||||
let mut lexed = FilterParser::parse(Rule::prgm, expr)?;
|
||||
Self::build(lexed.next().unwrap().into_inner(), schema)
|
||||
}
|
||||
|
||||
pub fn test(
|
||||
&self,
|
||||
reader: &RoTxn<MainT>,
|
||||
index: &Index,
|
||||
document_id: DocumentId,
|
||||
) -> Result<bool, Error> {
|
||||
use Filter::*;
|
||||
match self {
|
||||
Condition(c) => c.test(reader, index, document_id),
|
||||
Or(lhs, rhs) => Ok(
|
||||
lhs.test(reader, index, document_id)? || rhs.test(reader, index, document_id)?
|
||||
),
|
||||
And(lhs, rhs) => Ok(
|
||||
lhs.test(reader, index, document_id)? && rhs.test(reader, index, document_id)?
|
||||
),
|
||||
Not(op) => op.test(reader, index, document_id).map(bool::not),
|
||||
}
|
||||
}
|
||||
|
||||
fn build(expression: Pairs<'a, Rule>, schema: &'a Schema) -> FilterResult<'a> {
|
||||
PREC_CLIMBER.climb(
|
||||
expression,
|
||||
|pair: Pair<Rule>| match pair.as_rule() {
|
||||
Rule::eq => Ok(Filter::Condition(Condition::eq(pair, schema)?)),
|
||||
Rule::greater => Ok(Filter::Condition(Condition::greater(pair, schema)?)),
|
||||
Rule::less => Ok(Filter::Condition(Condition::less(pair, schema)?)),
|
||||
Rule::neq => Ok(Filter::Condition(Condition::neq(pair, schema)?)),
|
||||
Rule::geq => Ok(Filter::Condition(Condition::geq(pair, schema)?)),
|
||||
Rule::leq => Ok(Filter::Condition(Condition::leq(pair, schema)?)),
|
||||
Rule::prgm => Self::build(pair.into_inner(), schema),
|
||||
Rule::term => Self::build(pair.into_inner(), schema),
|
||||
Rule::not => Ok(Filter::Not(Box::new(Self::build(
|
||||
pair.into_inner(),
|
||||
schema,
|
||||
)?))),
|
||||
_ => unreachable!(),
|
||||
},
|
||||
|lhs: FilterResult, op: Pair<Rule>, rhs: FilterResult| match op.as_rule() {
|
||||
Rule::or => Ok(Filter::Or(Box::new(lhs?), Box::new(rhs?))),
|
||||
Rule::and => Ok(Filter::And(Box::new(lhs?), Box::new(rhs?))),
|
||||
_ => unreachable!(),
|
||||
},
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn invalid_syntax() {
|
||||
assert!(FilterParser::parse(Rule::prgm, "field : id").is_err());
|
||||
assert!(FilterParser::parse(Rule::prgm, "field=hello hello").is_err());
|
||||
assert!(FilterParser::parse(Rule::prgm, "field=hello OR OR").is_err());
|
||||
assert!(FilterParser::parse(Rule::prgm, "OR field:hello").is_err());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#"field="hello world"#).is_err());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#"field='hello world"#).is_err());
|
||||
assert!(FilterParser::parse(Rule::prgm, "NOT field=").is_err());
|
||||
assert!(FilterParser::parse(Rule::prgm, "N").is_err());
|
||||
assert!(FilterParser::parse(Rule::prgm, "(field=1").is_err());
|
||||
assert!(FilterParser::parse(Rule::prgm, "(field=1))").is_err());
|
||||
assert!(FilterParser::parse(Rule::prgm, "field=1ORfield=2").is_err());
|
||||
assert!(FilterParser::parse(Rule::prgm, "field=1 ( OR field=2)").is_err());
|
||||
assert!(FilterParser::parse(Rule::prgm, "hello world=1").is_err());
|
||||
assert!(FilterParser::parse(Rule::prgm, "").is_err());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#"((((((hello=world)))))"#).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn valid_syntax() {
|
||||
assert!(FilterParser::parse(Rule::prgm, "field = id").is_ok());
|
||||
assert!(FilterParser::parse(Rule::prgm, "field=id").is_ok());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#"field >= 10"#).is_ok());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#"field <= 10"#).is_ok());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#"field="hello world""#).is_ok());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#"field='hello world'"#).is_ok());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#"field > 10"#).is_ok());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#"field < 10"#).is_ok());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#"field < 10 AND NOT field=5"#).is_ok());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#"field < 10 AND NOT field > 7.5"#).is_ok());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#"field=true OR NOT field=5"#).is_ok());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#"NOT field=true OR NOT field=5"#).is_ok());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#"field='hello world' OR ( NOT field=true OR NOT field=5 )"#).is_ok());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#"field='hello \'worl\'d' OR ( NOT field=true OR NOT field=5 )"#).is_ok());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#"field="hello \"worl\"d" OR ( NOT field=true OR NOT field=5 )"#).is_ok());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#"((((((hello=world))))))"#).is_ok());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#""foo bar" > 10"#).is_ok());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#""foo bar" = 10"#).is_ok());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#"'foo bar' = 10"#).is_ok());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#"'foo bar' <= 10"#).is_ok());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#"'foo bar' != 10"#).is_ok());
|
||||
assert!(FilterParser::parse(Rule::prgm, r#"bar != 10"#).is_ok());
|
||||
}
|
||||
}
|
@ -1,28 +0,0 @@
|
||||
key = _{quoted | word}
|
||||
value = _{quoted | word}
|
||||
quoted = _{ (PUSH("'") | PUSH("\"")) ~ string ~ POP }
|
||||
string = {char*}
|
||||
word = ${(LETTER | NUMBER | "_" | "-" | ".")+}
|
||||
|
||||
char = _{ !(PEEK | "\\") ~ ANY
|
||||
| "\\" ~ (PEEK | "\\" | "/" | "b" | "f" | "n" | "r" | "t")
|
||||
| "\\" ~ ("u" ~ ASCII_HEX_DIGIT{4})}
|
||||
|
||||
condition = _{eq | greater | less | geq | leq | neq}
|
||||
geq = {key ~ ">=" ~ value}
|
||||
leq = {key ~ "<=" ~ value}
|
||||
neq = {key ~ "!=" ~ value}
|
||||
eq = {key ~ "=" ~ value}
|
||||
greater = {key ~ ">" ~ value}
|
||||
less = {key ~ "<" ~ value}
|
||||
|
||||
prgm = {SOI ~ expr ~ EOI}
|
||||
expr = _{ ( term ~ (operation ~ term)* ) }
|
||||
term = { ("(" ~ expr ~ ")") | condition | not }
|
||||
operation = _{ and | or }
|
||||
and = {"AND"}
|
||||
or = {"OR"}
|
||||
|
||||
not = {"NOT" ~ term}
|
||||
|
||||
WHITESPACE = _{ " " }
|
@ -1,12 +0,0 @@
|
||||
use once_cell::sync::Lazy;
|
||||
use pest::prec_climber::{Operator, Assoc, PrecClimber};
|
||||
|
||||
pub static PREC_CLIMBER: Lazy<PrecClimber<Rule>> = Lazy::new(|| {
|
||||
use Assoc::*;
|
||||
use Rule::*;
|
||||
pest::prec_climber::PrecClimber::new(vec![Operator::new(or, Left), Operator::new(and, Left)])
|
||||
});
|
||||
|
||||
#[derive(Parser)]
|
||||
#[grammar = "filters/parser/grammar.pest"]
|
||||
pub struct FilterParser;
|
@ -1,134 +0,0 @@
|
||||
use std::cmp::min;
|
||||
use std::collections::BTreeMap;
|
||||
use std::ops::{Index, IndexMut};
|
||||
|
||||
// A simple wrapper around vec so we can get contiguous but index it like it's 2D array.
|
||||
struct N2Array<T> {
|
||||
y_size: usize,
|
||||
buf: Vec<T>,
|
||||
}
|
||||
|
||||
impl<T: Clone> N2Array<T> {
|
||||
fn new(x: usize, y: usize, value: T) -> N2Array<T> {
|
||||
N2Array {
|
||||
y_size: y,
|
||||
buf: vec![value; x * y],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Index<(usize, usize)> for N2Array<T> {
|
||||
type Output = T;
|
||||
|
||||
#[inline]
|
||||
fn index(&self, (x, y): (usize, usize)) -> &T {
|
||||
&self.buf[(x * self.y_size) + y]
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> IndexMut<(usize, usize)> for N2Array<T> {
|
||||
#[inline]
|
||||
fn index_mut(&mut self, (x, y): (usize, usize)) -> &mut T {
|
||||
&mut self.buf[(x * self.y_size) + y]
|
||||
}
|
||||
}
|
||||
|
||||
pub fn prefix_damerau_levenshtein(source: &[u8], target: &[u8]) -> (u32, usize) {
|
||||
let (n, m) = (source.len(), target.len());
|
||||
|
||||
assert!(
|
||||
n <= m,
|
||||
"the source string must be shorter than the target one"
|
||||
);
|
||||
|
||||
if n == 0 {
|
||||
return (m as u32, 0);
|
||||
}
|
||||
if m == 0 {
|
||||
return (n as u32, 0);
|
||||
}
|
||||
|
||||
if n == m && source == target {
|
||||
return (0, m);
|
||||
}
|
||||
|
||||
let inf = n + m;
|
||||
let mut matrix = N2Array::new(n + 2, m + 2, 0);
|
||||
|
||||
matrix[(0, 0)] = inf;
|
||||
for i in 0..n + 1 {
|
||||
matrix[(i + 1, 0)] = inf;
|
||||
matrix[(i + 1, 1)] = i;
|
||||
}
|
||||
for j in 0..m + 1 {
|
||||
matrix[(0, j + 1)] = inf;
|
||||
matrix[(1, j + 1)] = j;
|
||||
}
|
||||
|
||||
let mut last_row = BTreeMap::new();
|
||||
|
||||
for (row, char_s) in source.iter().enumerate() {
|
||||
let mut last_match_col = 0;
|
||||
let row = row + 1;
|
||||
|
||||
for (col, char_t) in target.iter().enumerate() {
|
||||
let col = col + 1;
|
||||
let last_match_row = *last_row.get(&char_t).unwrap_or(&0);
|
||||
let cost = if char_s == char_t { 0 } else { 1 };
|
||||
|
||||
let dist_add = matrix[(row, col + 1)] + 1;
|
||||
let dist_del = matrix[(row + 1, col)] + 1;
|
||||
let dist_sub = matrix[(row, col)] + cost;
|
||||
let dist_trans = matrix[(last_match_row, last_match_col)]
|
||||
+ (row - last_match_row - 1)
|
||||
+ 1
|
||||
+ (col - last_match_col - 1);
|
||||
|
||||
let dist = min(min(dist_add, dist_del), min(dist_sub, dist_trans));
|
||||
|
||||
matrix[(row + 1, col + 1)] = dist;
|
||||
|
||||
if cost == 0 {
|
||||
last_match_col = col;
|
||||
}
|
||||
}
|
||||
|
||||
last_row.insert(char_s, row);
|
||||
}
|
||||
|
||||
let mut minimum = (u32::max_value(), 0);
|
||||
|
||||
for x in n..=m {
|
||||
let dist = matrix[(n + 1, x + 1)] as u32;
|
||||
if dist < minimum.0 {
|
||||
minimum = (dist, x)
|
||||
}
|
||||
}
|
||||
|
||||
minimum
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn matched_length() {
|
||||
let query = "Levenste";
|
||||
let text = "Levenshtein";
|
||||
|
||||
let (dist, length) = prefix_damerau_levenshtein(query.as_bytes(), text.as_bytes());
|
||||
assert_eq!(dist, 1);
|
||||
assert_eq!(&text[..length], "Levenshte");
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn matched_length_panic() {
|
||||
let query = "Levenshtein";
|
||||
let text = "Levenste";
|
||||
|
||||
// this function will panic if source if longer than target
|
||||
prefix_damerau_levenshtein(query.as_bytes(), text.as_bytes());
|
||||
}
|
||||
}
|
@ -1,203 +0,0 @@
|
||||
#![allow(clippy::type_complexity)]
|
||||
|
||||
#[cfg(test)]
|
||||
#[macro_use]
|
||||
extern crate assert_matches;
|
||||
#[macro_use]
|
||||
extern crate pest_derive;
|
||||
|
||||
mod automaton;
|
||||
mod bucket_sort;
|
||||
mod database;
|
||||
mod distinct_map;
|
||||
mod error;
|
||||
mod filters;
|
||||
mod levenshtein;
|
||||
mod number;
|
||||
mod query_builder;
|
||||
mod query_tree;
|
||||
mod query_words_mapper;
|
||||
mod ranked_map;
|
||||
mod raw_document;
|
||||
mod reordered_attrs;
|
||||
pub mod criterion;
|
||||
pub mod facets;
|
||||
pub mod raw_indexer;
|
||||
pub mod serde;
|
||||
pub mod settings;
|
||||
pub mod store;
|
||||
pub mod update;
|
||||
|
||||
pub use self::database::{BoxUpdateFn, Database, DatabaseOptions, MainT, UpdateT, MainWriter, MainReader, UpdateWriter, UpdateReader};
|
||||
pub use self::error::{Error, HeedError, FstError, MResult, pest_error, FacetError};
|
||||
pub use self::filters::Filter;
|
||||
pub use self::number::{Number, ParseNumberError};
|
||||
pub use self::ranked_map::RankedMap;
|
||||
pub use self::raw_document::RawDocument;
|
||||
pub use self::store::Index;
|
||||
pub use self::update::{EnqueuedUpdateResult, ProcessedUpdateResult, UpdateStatus, UpdateType};
|
||||
pub use meilisearch_types::{DocIndex, DocumentId, Highlight};
|
||||
pub use meilisearch_schema::Schema;
|
||||
pub use query_words_mapper::QueryWordsMapper;
|
||||
pub use query_tree::MAX_QUERY_LEN;
|
||||
|
||||
use compact_arena::SmallArena;
|
||||
use log::{error, trace};
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::convert::TryFrom;
|
||||
|
||||
use crate::bucket_sort::PostingsListView;
|
||||
use crate::levenshtein::prefix_damerau_levenshtein;
|
||||
use crate::query_tree::{QueryId, QueryKind};
|
||||
use crate::reordered_attrs::ReorderedAttrs;
|
||||
|
||||
type FstSetCow<'a> = fst::Set<Cow<'a, [u8]>>;
|
||||
type FstMapCow<'a> = fst::Map<Cow<'a, [u8]>>;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct Document {
|
||||
pub id: DocumentId,
|
||||
pub highlights: Vec<Highlight>,
|
||||
|
||||
#[cfg(test)]
|
||||
pub matches: Vec<crate::bucket_sort::SimpleMatch>,
|
||||
}
|
||||
|
||||
fn highlights_from_raw_document<'a, 'tag, 'txn>(
|
||||
raw_document: &RawDocument<'a, 'tag>,
|
||||
queries_kinds: &HashMap<QueryId, &QueryKind>,
|
||||
arena: &SmallArena<'tag, PostingsListView<'txn>>,
|
||||
searchable_attrs: Option<&ReorderedAttrs>,
|
||||
schema: &Schema,
|
||||
) -> Vec<Highlight>
|
||||
{
|
||||
let mut highlights = Vec::new();
|
||||
|
||||
for bm in raw_document.bare_matches.iter() {
|
||||
let postings_list = &arena[bm.postings_list];
|
||||
let input = postings_list.input();
|
||||
let kind = &queries_kinds.get(&bm.query_index);
|
||||
|
||||
for di in postings_list.iter() {
|
||||
let covered_area = match kind {
|
||||
Some(QueryKind::NonTolerant(query)) | Some(QueryKind::Tolerant(query)) => {
|
||||
let len = if query.len() > input.len() {
|
||||
input.len()
|
||||
} else {
|
||||
prefix_damerau_levenshtein(query.as_bytes(), input).1
|
||||
};
|
||||
u16::try_from(len).unwrap_or(u16::max_value())
|
||||
},
|
||||
_ => di.char_length,
|
||||
};
|
||||
|
||||
let attribute = searchable_attrs
|
||||
.and_then(|sa| sa.reverse(di.attribute))
|
||||
.unwrap_or(di.attribute);
|
||||
|
||||
let attribute = match schema.indexed_pos_to_field_id(attribute) {
|
||||
Some(field_id) => field_id.0,
|
||||
None => {
|
||||
error!("Cannot convert indexed_pos {} to field_id", attribute);
|
||||
trace!("Schema is compromized; {:?}", schema);
|
||||
continue
|
||||
}
|
||||
};
|
||||
|
||||
let highlight = Highlight {
|
||||
attribute,
|
||||
char_index: di.char_index,
|
||||
char_length: covered_area,
|
||||
};
|
||||
|
||||
highlights.push(highlight);
|
||||
}
|
||||
}
|
||||
|
||||
highlights
|
||||
}
|
||||
|
||||
impl Document {
|
||||
#[cfg(not(test))]
|
||||
pub fn from_highlights(id: DocumentId, highlights: &[Highlight]) -> Document {
|
||||
Document { id, highlights: highlights.to_owned() }
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn from_highlights(id: DocumentId, highlights: &[Highlight]) -> Document {
|
||||
Document { id, highlights: highlights.to_owned(), matches: Vec::new() }
|
||||
}
|
||||
|
||||
#[cfg(not(test))]
|
||||
pub fn from_raw<'a, 'tag, 'txn>(
|
||||
raw_document: RawDocument<'a, 'tag>,
|
||||
queries_kinds: &HashMap<QueryId, &QueryKind>,
|
||||
arena: &SmallArena<'tag, PostingsListView<'txn>>,
|
||||
searchable_attrs: Option<&ReorderedAttrs>,
|
||||
schema: &Schema,
|
||||
) -> Document
|
||||
{
|
||||
let highlights = highlights_from_raw_document(
|
||||
&raw_document,
|
||||
queries_kinds,
|
||||
arena,
|
||||
searchable_attrs,
|
||||
schema,
|
||||
);
|
||||
|
||||
Document { id: raw_document.id, highlights }
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn from_raw<'a, 'tag, 'txn>(
|
||||
raw_document: RawDocument<'a, 'tag>,
|
||||
queries_kinds: &HashMap<QueryId, &QueryKind>,
|
||||
arena: &SmallArena<'tag, PostingsListView<'txn>>,
|
||||
searchable_attrs: Option<&ReorderedAttrs>,
|
||||
schema: &Schema,
|
||||
) -> Document
|
||||
{
|
||||
use crate::bucket_sort::SimpleMatch;
|
||||
|
||||
let highlights = highlights_from_raw_document(
|
||||
&raw_document,
|
||||
queries_kinds,
|
||||
arena,
|
||||
searchable_attrs,
|
||||
schema,
|
||||
);
|
||||
|
||||
let mut matches = Vec::new();
|
||||
for sm in raw_document.processed_matches {
|
||||
let attribute = searchable_attrs
|
||||
.and_then(|sa| sa.reverse(sm.attribute))
|
||||
.unwrap_or(sm.attribute);
|
||||
|
||||
let attribute = match schema.indexed_pos_to_field_id(attribute) {
|
||||
Some(field_id) => field_id.0,
|
||||
None => {
|
||||
error!("Cannot convert indexed_pos {} to field_id", attribute);
|
||||
trace!("Schema is compromized; {:?}", schema);
|
||||
continue
|
||||
}
|
||||
};
|
||||
|
||||
matches.push(SimpleMatch { attribute, ..sm });
|
||||
}
|
||||
matches.sort_unstable();
|
||||
|
||||
Document { id: raw_document.id, highlights, matches }
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::mem;
|
||||
|
||||
#[test]
|
||||
fn docindex_mem_size() {
|
||||
assert_eq!(mem::size_of::<DocIndex>(), 12);
|
||||
}
|
||||
}
|
@ -1,120 +0,0 @@
|
||||
use std::cmp::Ordering;
|
||||
use std::fmt;
|
||||
use std::num::{ParseFloatError, ParseIntError};
|
||||
use std::str::FromStr;
|
||||
|
||||
use ordered_float::OrderedFloat;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Copy, Clone)]
|
||||
pub enum Number {
|
||||
Unsigned(u64),
|
||||
Signed(i64),
|
||||
Float(OrderedFloat<f64>),
|
||||
Null,
|
||||
}
|
||||
|
||||
impl Default for Number {
|
||||
fn default() -> Self {
|
||||
Self::Null
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for Number {
|
||||
type Err = ParseNumberError;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
let uint_error = match u64::from_str(s) {
|
||||
Ok(unsigned) => return Ok(Number::Unsigned(unsigned)),
|
||||
Err(error) => error,
|
||||
};
|
||||
|
||||
let int_error = match i64::from_str(s) {
|
||||
Ok(signed) => return Ok(Number::Signed(signed)),
|
||||
Err(error) => error,
|
||||
};
|
||||
|
||||
let float_error = match f64::from_str(s) {
|
||||
Ok(float) => return Ok(Number::Float(OrderedFloat(float))),
|
||||
Err(error) => error,
|
||||
};
|
||||
|
||||
Err(ParseNumberError {
|
||||
uint_error,
|
||||
int_error,
|
||||
float_error,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for Number {
|
||||
fn eq(&self, other: &Number) -> bool {
|
||||
self.cmp(other) == Ordering::Equal
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for Number {}
|
||||
|
||||
impl PartialOrd for Number {
|
||||
fn partial_cmp(&self, other: &Number) -> Option<Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl Ord for Number {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
use Number::{Float, Signed, Unsigned, Null};
|
||||
|
||||
match (*self, *other) {
|
||||
(Unsigned(a), Unsigned(b)) => a.cmp(&b),
|
||||
(Unsigned(a), Signed(b)) => {
|
||||
if b < 0 {
|
||||
Ordering::Greater
|
||||
} else {
|
||||
a.cmp(&(b as u64))
|
||||
}
|
||||
}
|
||||
(Unsigned(a), Float(b)) => (OrderedFloat(a as f64)).cmp(&b),
|
||||
(Signed(a), Unsigned(b)) => {
|
||||
if a < 0 {
|
||||
Ordering::Less
|
||||
} else {
|
||||
(a as u64).cmp(&b)
|
||||
}
|
||||
}
|
||||
(Signed(a), Signed(b)) => a.cmp(&b),
|
||||
(Signed(a), Float(b)) => OrderedFloat(a as f64).cmp(&b),
|
||||
(Float(a), Unsigned(b)) => a.cmp(&OrderedFloat(b as f64)),
|
||||
(Float(a), Signed(b)) => a.cmp(&OrderedFloat(b as f64)),
|
||||
(Float(a), Float(b)) => a.cmp(&b),
|
||||
(Null, Null) => Ordering::Equal,
|
||||
(_, Null) => Ordering::Less,
|
||||
(Null, _) => Ordering::Greater,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct ParseNumberError {
|
||||
uint_error: ParseIntError,
|
||||
int_error: ParseIntError,
|
||||
float_error: ParseFloatError,
|
||||
}
|
||||
|
||||
impl fmt::Display for ParseNumberError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
if self.uint_error == self.int_error {
|
||||
write!(
|
||||
f,
|
||||
"can not parse number: {}, {}",
|
||||
self.uint_error, self.float_error
|
||||
)
|
||||
} else {
|
||||
write!(
|
||||
f,
|
||||
"can not parse number: {}, {}, {}",
|
||||
self.uint_error, self.int_error, self.float_error
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -1,573 +0,0 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::ops::Range;
|
||||
use std::time::Instant;
|
||||
use std::{cmp, fmt, iter::once};
|
||||
|
||||
use fst::{IntoStreamer, Streamer};
|
||||
use itertools::{EitherOrBoth, merge_join_by};
|
||||
use log::debug;
|
||||
use meilisearch_tokenizer::analyzer::{Analyzer, AnalyzerConfig};
|
||||
use sdset::{Set, SetBuf, SetOperation};
|
||||
|
||||
use crate::database::MainT;
|
||||
use crate::{store, DocumentId, DocIndex, MResult, FstSetCow};
|
||||
use crate::automaton::{build_dfa, build_prefix_dfa, build_exact_dfa};
|
||||
use crate::QueryWordsMapper;
|
||||
|
||||
pub const MAX_QUERY_LEN: usize = 10;
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||
pub enum Operation {
|
||||
And(Vec<Operation>),
|
||||
Or(Vec<Operation>),
|
||||
Query(Query),
|
||||
}
|
||||
|
||||
impl fmt::Debug for Operation {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
fn pprint_tree(f: &mut fmt::Formatter<'_>, op: &Operation, depth: usize) -> fmt::Result {
|
||||
match op {
|
||||
Operation::And(children) => {
|
||||
writeln!(f, "{:1$}AND", "", depth * 2)?;
|
||||
children.iter().try_for_each(|c| pprint_tree(f, c, depth + 1))
|
||||
},
|
||||
Operation::Or(children) => {
|
||||
writeln!(f, "{:1$}OR", "", depth * 2)?;
|
||||
children.iter().try_for_each(|c| pprint_tree(f, c, depth + 1))
|
||||
},
|
||||
Operation::Query(query) => writeln!(f, "{:2$}{:?}", "", query, depth * 2),
|
||||
}
|
||||
}
|
||||
|
||||
pprint_tree(f, self, 0)
|
||||
}
|
||||
}
|
||||
|
||||
impl Operation {
|
||||
fn tolerant(id: QueryId, prefix: bool, s: &str) -> Operation {
|
||||
Operation::Query(Query { id, prefix, exact: true, kind: QueryKind::Tolerant(s.to_string()) })
|
||||
}
|
||||
|
||||
fn non_tolerant(id: QueryId, prefix: bool, s: &str) -> Operation {
|
||||
Operation::Query(Query { id, prefix, exact: true, kind: QueryKind::NonTolerant(s.to_string()) })
|
||||
}
|
||||
|
||||
fn phrase2(id: QueryId, prefix: bool, (left, right): (&str, &str)) -> Operation {
|
||||
let kind = QueryKind::Phrase(vec![left.to_owned(), right.to_owned()]);
|
||||
Operation::Query(Query { id, prefix, exact: true, kind })
|
||||
}
|
||||
}
|
||||
|
||||
pub type QueryId = usize;
|
||||
|
||||
#[derive(Clone, Eq)]
|
||||
pub struct Query {
|
||||
pub id: QueryId,
|
||||
pub prefix: bool,
|
||||
pub exact: bool,
|
||||
pub kind: QueryKind,
|
||||
}
|
||||
|
||||
impl PartialEq for Query {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.prefix == other.prefix && self.kind == other.kind
|
||||
}
|
||||
}
|
||||
|
||||
impl Hash for Query {
|
||||
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||
self.prefix.hash(state);
|
||||
self.kind.hash(state);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||
pub enum QueryKind {
|
||||
Tolerant(String),
|
||||
NonTolerant(String),
|
||||
Phrase(Vec<String>),
|
||||
}
|
||||
|
||||
impl fmt::Debug for Query {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let Query { id, prefix, kind, .. } = self;
|
||||
let prefix = if *prefix { String::from("Prefix") } else { String::default() };
|
||||
match kind {
|
||||
QueryKind::NonTolerant(word) => {
|
||||
f.debug_struct(&(prefix + "NonTolerant")).field("id", &id).field("word", &word).finish()
|
||||
},
|
||||
QueryKind::Tolerant(word) => {
|
||||
f.debug_struct(&(prefix + "Tolerant")).field("id", &id).field("word", &word).finish()
|
||||
},
|
||||
QueryKind::Phrase(words) => {
|
||||
f.debug_struct(&(prefix + "Phrase")).field("id", &id).field("words", &words).finish()
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct PostingsList {
|
||||
docids: SetBuf<DocumentId>,
|
||||
matches: SetBuf<DocIndex>,
|
||||
}
|
||||
|
||||
pub struct Context<'a> {
|
||||
pub words_set: FstSetCow<'a>,
|
||||
pub stop_words: FstSetCow<'a>,
|
||||
pub synonyms: store::Synonyms,
|
||||
pub postings_lists: store::PostingsLists,
|
||||
pub prefix_postings_lists: store::PrefixPostingsListsCache,
|
||||
}
|
||||
|
||||
fn split_best_frequency<'a>(reader: &heed::RoTxn<MainT>, ctx: &Context, word: &'a str) -> MResult<Option<(&'a str, &'a str)>> {
|
||||
let chars = word.char_indices().skip(1);
|
||||
let mut best = None;
|
||||
|
||||
for (i, _) in chars {
|
||||
let (left, right) = word.split_at(i);
|
||||
|
||||
let left_freq = ctx.postings_lists
|
||||
.postings_list(reader, left.as_bytes())?
|
||||
.map(|p| p.docids.len())
|
||||
.unwrap_or(0);
|
||||
let right_freq = ctx.postings_lists
|
||||
.postings_list(reader, right.as_bytes())?
|
||||
.map(|p| p.docids.len())
|
||||
.unwrap_or(0);
|
||||
|
||||
let min_freq = cmp::min(left_freq, right_freq);
|
||||
if min_freq != 0 && best.map_or(true, |(old, _, _)| min_freq > old) {
|
||||
best = Some((min_freq, left, right));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(best.map(|(_, l, r)| (l, r)))
|
||||
}
|
||||
|
||||
fn fetch_synonyms(reader: &heed::RoTxn<MainT>, ctx: &Context, words: &[&str]) -> MResult<Vec<Vec<String>>> {
|
||||
let words = &words.join(" ");
|
||||
let set = ctx.synonyms.synonyms_fst(reader, words.as_bytes())?;
|
||||
|
||||
let mut strings = Vec::new();
|
||||
let mut stream = set.stream();
|
||||
while let Some(input) = stream.next() {
|
||||
if let Ok(input) = std::str::from_utf8(input) {
|
||||
let alts = input.split_ascii_whitespace().map(ToOwned::to_owned).collect();
|
||||
strings.push(alts);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(strings)
|
||||
}
|
||||
|
||||
fn create_operation<I, F>(iter: I, f: F) -> Operation
|
||||
where I: IntoIterator<Item=Operation>,
|
||||
F: Fn(Vec<Operation>) -> Operation,
|
||||
{
|
||||
let mut iter = iter.into_iter();
|
||||
match (iter.next(), iter.next()) {
|
||||
(Some(first), None) => first,
|
||||
(first, second) => f(first.into_iter().chain(second).chain(iter).collect()),
|
||||
}
|
||||
}
|
||||
|
||||
const MAX_NGRAM: usize = 3;
|
||||
|
||||
fn split_query_string<A: AsRef<[u8]>>(s: &str, stop_words: &fst::Set<A>) -> Vec<(usize, String)> {
|
||||
// TODO: Use global instance instead
|
||||
Analyzer::new(AnalyzerConfig::default_with_stopwords(stop_words))
|
||||
.analyze(s)
|
||||
.tokens()
|
||||
.filter(|t| t.is_word())
|
||||
.map(|t| t.word.to_string())
|
||||
.take(MAX_QUERY_LEN)
|
||||
.enumerate()
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn create_query_tree(
|
||||
reader: &heed::RoTxn<MainT>,
|
||||
ctx: &Context,
|
||||
query: &str,
|
||||
) -> MResult<(Operation, HashMap<QueryId, Range<usize>>)>
|
||||
{
|
||||
// TODO: use a shared analyzer instance
|
||||
let words = split_query_string(query, &ctx.stop_words);
|
||||
|
||||
let mut mapper = QueryWordsMapper::new(words.iter().map(|(_, w)| w));
|
||||
|
||||
fn create_inner(
|
||||
reader: &heed::RoTxn<MainT>,
|
||||
ctx: &Context,
|
||||
mapper: &mut QueryWordsMapper,
|
||||
words: &[(usize, String)],
|
||||
) -> MResult<Vec<Operation>>
|
||||
{
|
||||
let mut alts = Vec::new();
|
||||
|
||||
for ngram in 1..=MAX_NGRAM {
|
||||
if let Some(group) = words.get(..ngram) {
|
||||
let mut group_ops = Vec::new();
|
||||
|
||||
let tail = &words[ngram..];
|
||||
let is_last = tail.is_empty();
|
||||
|
||||
let mut group_alts = Vec::new();
|
||||
match group {
|
||||
[(id, word)] => {
|
||||
let mut idgen = ((id + 1) * 100)..;
|
||||
let range = (*id)..id+1;
|
||||
|
||||
let phrase = split_best_frequency(reader, ctx, word)?
|
||||
.map(|ws| {
|
||||
let id = idgen.next().unwrap();
|
||||
idgen.next().unwrap();
|
||||
mapper.declare(range.clone(), id, &[ws.0, ws.1]);
|
||||
Operation::phrase2(id, is_last, ws)
|
||||
});
|
||||
|
||||
let synonyms = fetch_synonyms(reader, ctx, &[word])?
|
||||
.into_iter()
|
||||
.map(|alts| {
|
||||
let exact = alts.len() == 1;
|
||||
let id = idgen.next().unwrap();
|
||||
mapper.declare(range.clone(), id, &alts);
|
||||
|
||||
let mut idgen = once(id).chain(&mut idgen);
|
||||
let iter = alts.into_iter().map(|w| {
|
||||
let id = idgen.next().unwrap();
|
||||
let kind = QueryKind::NonTolerant(w);
|
||||
Operation::Query(Query { id, prefix: false, exact, kind })
|
||||
});
|
||||
|
||||
create_operation(iter, Operation::And)
|
||||
});
|
||||
|
||||
let original = Operation::tolerant(*id, is_last, word);
|
||||
|
||||
group_alts.push(original);
|
||||
group_alts.extend(synonyms.chain(phrase));
|
||||
},
|
||||
words => {
|
||||
let id = words[0].0;
|
||||
let mut idgen = ((id + 1) * 100_usize.pow(ngram as u32))..;
|
||||
let range = id..id+ngram;
|
||||
|
||||
let words: Vec<_> = words.iter().map(|(_, s)| s.as_str()).collect();
|
||||
|
||||
for synonym in fetch_synonyms(reader, ctx, &words)? {
|
||||
let exact = synonym.len() == 1;
|
||||
let id = idgen.next().unwrap();
|
||||
mapper.declare(range.clone(), id, &synonym);
|
||||
|
||||
let mut idgen = once(id).chain(&mut idgen);
|
||||
let synonym = synonym.into_iter().map(|s| {
|
||||
let id = idgen.next().unwrap();
|
||||
let kind = QueryKind::NonTolerant(s);
|
||||
Operation::Query(Query { id, prefix: false, exact, kind })
|
||||
});
|
||||
group_alts.push(create_operation(synonym, Operation::And));
|
||||
}
|
||||
|
||||
let id = idgen.next().unwrap();
|
||||
let concat = words.concat();
|
||||
mapper.declare(range.clone(), id, &[&concat]);
|
||||
group_alts.push(Operation::non_tolerant(id, is_last, &concat));
|
||||
}
|
||||
}
|
||||
|
||||
group_ops.push(create_operation(group_alts, Operation::Or));
|
||||
|
||||
if !tail.is_empty() {
|
||||
let tail_ops = create_inner(reader, ctx, mapper, tail)?;
|
||||
group_ops.push(create_operation(tail_ops, Operation::Or));
|
||||
}
|
||||
|
||||
alts.push(create_operation(group_ops, Operation::And));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(alts)
|
||||
}
|
||||
|
||||
let alternatives = create_inner(reader, ctx, &mut mapper, &words)?;
|
||||
let operation = Operation::Or(alternatives);
|
||||
let mapping = mapper.mapping();
|
||||
|
||||
Ok((operation, mapping))
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct PostingsKey<'o> {
|
||||
pub query: &'o Query,
|
||||
pub input: Vec<u8>,
|
||||
pub distance: u8,
|
||||
pub is_exact: bool,
|
||||
}
|
||||
|
||||
pub type Postings<'o, 'txn> = HashMap<PostingsKey<'o>, Cow<'txn, Set<DocIndex>>>;
|
||||
pub type Cache<'o, 'txn> = HashMap<&'o Operation, Cow<'txn, Set<DocumentId>>>;
|
||||
|
||||
pub struct QueryResult<'o, 'txn> {
|
||||
pub docids: Cow<'txn, Set<DocumentId>>,
|
||||
pub queries: Postings<'o, 'txn>,
|
||||
}
|
||||
|
||||
pub fn traverse_query_tree<'o, 'txn>(
|
||||
reader: &'txn heed::RoTxn<MainT>,
|
||||
ctx: &Context,
|
||||
tree: &'o Operation,
|
||||
) -> MResult<QueryResult<'o, 'txn>>
|
||||
{
|
||||
fn execute_and<'o, 'txn>(
|
||||
reader: &'txn heed::RoTxn<MainT>,
|
||||
ctx: &Context,
|
||||
cache: &mut Cache<'o, 'txn>,
|
||||
postings: &mut Postings<'o, 'txn>,
|
||||
depth: usize,
|
||||
operations: &'o [Operation],
|
||||
) -> MResult<Cow<'txn, Set<DocumentId>>>
|
||||
{
|
||||
debug!("{:1$}AND", "", depth * 2);
|
||||
|
||||
let before = Instant::now();
|
||||
let mut results = Vec::new();
|
||||
|
||||
for op in operations {
|
||||
if cache.get(op).is_none() {
|
||||
let docids = match op {
|
||||
Operation::And(ops) => execute_and(reader, ctx, cache, postings, depth + 1, &ops)?,
|
||||
Operation::Or(ops) => execute_or(reader, ctx, cache, postings, depth + 1, &ops)?,
|
||||
Operation::Query(query) => execute_query(reader, ctx, postings, depth + 1, &query)?,
|
||||
};
|
||||
cache.insert(op, docids);
|
||||
}
|
||||
}
|
||||
|
||||
for op in operations {
|
||||
if let Some(docids) = cache.get(op) {
|
||||
results.push(docids.as_ref());
|
||||
}
|
||||
}
|
||||
|
||||
let op = sdset::multi::Intersection::new(results);
|
||||
let docids = op.into_set_buf();
|
||||
|
||||
debug!("{:3$}--- AND fetched {} documents in {:.02?}", "", docids.len(), before.elapsed(), depth * 2);
|
||||
|
||||
Ok(Cow::Owned(docids))
|
||||
}
|
||||
|
||||
fn execute_or<'o, 'txn>(
|
||||
reader: &'txn heed::RoTxn<MainT>,
|
||||
ctx: &Context,
|
||||
cache: &mut Cache<'o, 'txn>,
|
||||
postings: &mut Postings<'o, 'txn>,
|
||||
depth: usize,
|
||||
operations: &'o [Operation],
|
||||
) -> MResult<Cow<'txn, Set<DocumentId>>>
|
||||
{
|
||||
debug!("{:1$}OR", "", depth * 2);
|
||||
|
||||
let before = Instant::now();
|
||||
let mut results = Vec::new();
|
||||
|
||||
for op in operations {
|
||||
if cache.get(op).is_none() {
|
||||
let docids = match op {
|
||||
Operation::And(ops) => execute_and(reader, ctx, cache, postings, depth + 1, &ops)?,
|
||||
Operation::Or(ops) => execute_or(reader, ctx, cache, postings, depth + 1, &ops)?,
|
||||
Operation::Query(query) => execute_query(reader, ctx, postings, depth + 1, &query)?,
|
||||
};
|
||||
cache.insert(op, docids);
|
||||
}
|
||||
}
|
||||
|
||||
for op in operations {
|
||||
if let Some(docids) = cache.get(op) {
|
||||
results.push(docids.as_ref());
|
||||
}
|
||||
}
|
||||
|
||||
let op = sdset::multi::Union::new(results);
|
||||
let docids = op.into_set_buf();
|
||||
|
||||
debug!("{:3$}--- OR fetched {} documents in {:.02?}", "", docids.len(), before.elapsed(), depth * 2);
|
||||
|
||||
Ok(Cow::Owned(docids))
|
||||
}
|
||||
|
||||
fn execute_query<'o, 'txn>(
|
||||
reader: &'txn heed::RoTxn<MainT>,
|
||||
ctx: &Context,
|
||||
postings: &mut Postings<'o, 'txn>,
|
||||
depth: usize,
|
||||
query: &'o Query,
|
||||
) -> MResult<Cow<'txn, Set<DocumentId>>>
|
||||
{
|
||||
let before = Instant::now();
|
||||
|
||||
let Query { prefix, kind, exact, .. } = query;
|
||||
let docids: Cow<Set<_>> = match kind {
|
||||
QueryKind::Tolerant(word) => {
|
||||
if *prefix && word.len() <= 2 {
|
||||
let prefix = {
|
||||
let mut array = [0; 4];
|
||||
let bytes = word.as_bytes();
|
||||
array[..bytes.len()].copy_from_slice(bytes);
|
||||
array
|
||||
};
|
||||
|
||||
// We retrieve the cached postings lists for all
|
||||
// the words that starts with this short prefix.
|
||||
let result = ctx.prefix_postings_lists.prefix_postings_list(reader, prefix)?.unwrap_or_default();
|
||||
let key = PostingsKey { query, input: word.clone().into_bytes(), distance: 0, is_exact: false };
|
||||
postings.insert(key, result.matches);
|
||||
let prefix_docids = &result.docids;
|
||||
|
||||
// We retrieve the exact postings list for the prefix,
|
||||
// because we must consider these matches as exact.
|
||||
let result = ctx.postings_lists.postings_list(reader, word.as_bytes())?.unwrap_or_default();
|
||||
let key = PostingsKey { query, input: word.clone().into_bytes(), distance: 0, is_exact: true };
|
||||
postings.insert(key, result.matches);
|
||||
let exact_docids = &result.docids;
|
||||
|
||||
let before = Instant::now();
|
||||
let docids = sdset::duo::Union::new(prefix_docids, exact_docids).into_set_buf();
|
||||
debug!("{:4$}prefix docids ({} and {}) construction took {:.02?}",
|
||||
"", prefix_docids.len(), exact_docids.len(), before.elapsed(), depth * 2);
|
||||
|
||||
Cow::Owned(docids)
|
||||
|
||||
} else {
|
||||
let dfa = if *prefix { build_prefix_dfa(word) } else { build_dfa(word) };
|
||||
|
||||
let byte = word.as_bytes()[0];
|
||||
let mut stream = if byte == u8::max_value() {
|
||||
ctx.words_set.search(&dfa).ge(&[byte]).into_stream()
|
||||
} else {
|
||||
ctx.words_set.search(&dfa).ge(&[byte]).lt(&[byte + 1]).into_stream()
|
||||
};
|
||||
|
||||
let before = Instant::now();
|
||||
let mut results = Vec::new();
|
||||
while let Some(input) = stream.next() {
|
||||
if let Some(result) = ctx.postings_lists.postings_list(reader, input)? {
|
||||
let distance = dfa.eval(input).to_u8();
|
||||
let is_exact = *exact && distance == 0 && input.len() == word.len();
|
||||
results.push(result.docids);
|
||||
let key = PostingsKey { query, input: input.to_owned(), distance, is_exact };
|
||||
postings.insert(key, result.matches);
|
||||
}
|
||||
}
|
||||
debug!("{:3$}docids retrieval ({:?}) took {:.02?}", "", results.len(), before.elapsed(), depth * 2);
|
||||
|
||||
let before = Instant::now();
|
||||
let docids = if results.len() > 10 {
|
||||
let cap = results.iter().map(|dis| dis.len()).sum();
|
||||
let mut docids = Vec::with_capacity(cap);
|
||||
for dis in results {
|
||||
docids.extend_from_slice(&dis);
|
||||
}
|
||||
SetBuf::from_dirty(docids)
|
||||
} else {
|
||||
let sets = results.iter().map(AsRef::as_ref).collect();
|
||||
sdset::multi::Union::new(sets).into_set_buf()
|
||||
};
|
||||
debug!("{:2$}docids construction took {:.02?}", "", before.elapsed(), depth * 2);
|
||||
|
||||
Cow::Owned(docids)
|
||||
}
|
||||
},
|
||||
QueryKind::NonTolerant(word) => {
|
||||
// TODO support prefix and non-prefix exact DFA
|
||||
let dfa = build_exact_dfa(word);
|
||||
|
||||
let byte = word.as_bytes()[0];
|
||||
let mut stream = if byte == u8::max_value() {
|
||||
ctx.words_set.search(&dfa).ge(&[byte]).into_stream()
|
||||
} else {
|
||||
ctx.words_set.search(&dfa).ge(&[byte]).lt(&[byte + 1]).into_stream()
|
||||
};
|
||||
|
||||
let before = Instant::now();
|
||||
let mut results = Vec::new();
|
||||
while let Some(input) = stream.next() {
|
||||
if let Some(result) = ctx.postings_lists.postings_list(reader, input)? {
|
||||
let distance = dfa.eval(input).to_u8();
|
||||
results.push(result.docids);
|
||||
let key = PostingsKey { query, input: input.to_owned(), distance, is_exact: *exact };
|
||||
postings.insert(key, result.matches);
|
||||
}
|
||||
}
|
||||
debug!("{:3$}docids retrieval ({:?}) took {:.02?}", "", results.len(), before.elapsed(), depth * 2);
|
||||
|
||||
let before = Instant::now();
|
||||
let docids = if results.len() > 10 {
|
||||
let cap = results.iter().map(|dis| dis.len()).sum();
|
||||
let mut docids = Vec::with_capacity(cap);
|
||||
for dis in results {
|
||||
docids.extend_from_slice(&dis);
|
||||
}
|
||||
SetBuf::from_dirty(docids)
|
||||
} else {
|
||||
let sets = results.iter().map(AsRef::as_ref).collect();
|
||||
sdset::multi::Union::new(sets).into_set_buf()
|
||||
};
|
||||
debug!("{:2$}docids construction took {:.02?}", "", before.elapsed(), depth * 2);
|
||||
|
||||
Cow::Owned(docids)
|
||||
},
|
||||
QueryKind::Phrase(words) => {
|
||||
// TODO support prefix and non-prefix exact DFA
|
||||
if let [first, second] = words.as_slice() {
|
||||
let first = ctx.postings_lists.postings_list(reader, first.as_bytes())?.unwrap_or_default();
|
||||
let second = ctx.postings_lists.postings_list(reader, second.as_bytes())?.unwrap_or_default();
|
||||
|
||||
let iter = merge_join_by(first.matches.as_slice(), second.matches.as_slice(), |a, b| {
|
||||
let x = (a.document_id, a.attribute, (a.word_index as u32) + 1);
|
||||
let y = (b.document_id, b.attribute, b.word_index as u32);
|
||||
x.cmp(&y)
|
||||
});
|
||||
|
||||
let matches: Vec<_> = iter
|
||||
.filter_map(EitherOrBoth::both)
|
||||
.flat_map(|(a, b)| once(*a).chain(Some(*b)))
|
||||
.collect();
|
||||
|
||||
let before = Instant::now();
|
||||
let mut docids: Vec<_> = matches.iter().map(|m| m.document_id).collect();
|
||||
docids.dedup();
|
||||
let docids = SetBuf::new(docids).unwrap();
|
||||
debug!("{:2$}docids construction took {:.02?}", "", before.elapsed(), depth * 2);
|
||||
|
||||
let matches = Cow::Owned(SetBuf::from_dirty(matches));
|
||||
let key = PostingsKey { query, input: vec![], distance: 0, is_exact: true };
|
||||
postings.insert(key, matches);
|
||||
|
||||
Cow::Owned(docids)
|
||||
} else {
|
||||
debug!("{:2$}{:?} skipped", "", words, depth * 2);
|
||||
Cow::default()
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
debug!("{:4$}{:?} fetched {:?} documents in {:.02?}", "", query, docids.len(), before.elapsed(), depth * 2);
|
||||
Ok(docids)
|
||||
}
|
||||
|
||||
let mut cache = Cache::new();
|
||||
let mut postings = Postings::new();
|
||||
|
||||
let docids = match tree {
|
||||
Operation::And(ops) => execute_and(reader, ctx, &mut cache, &mut postings, 0, &ops)?,
|
||||
Operation::Or(ops) => execute_or(reader, ctx, &mut cache, &mut postings, 0, &ops)?,
|
||||
Operation::Query(query) => execute_query(reader, ctx, &mut postings, 0, &query)?,
|
||||
};
|
||||
|
||||
Ok(QueryResult { docids, queries: postings })
|
||||
}
|
@ -1,416 +0,0 @@
|
||||
use std::collections::HashMap;
|
||||
use std::iter::FromIterator;
|
||||
use std::ops::Range;
|
||||
use intervaltree::{Element, IntervalTree};
|
||||
|
||||
pub type QueryId = usize;
|
||||
|
||||
pub struct QueryWordsMapper {
|
||||
originals: Vec<String>,
|
||||
mappings: HashMap<QueryId, (Range<usize>, Vec<String>)>,
|
||||
}
|
||||
|
||||
impl QueryWordsMapper {
|
||||
pub fn new<I, A>(originals: I) -> QueryWordsMapper
|
||||
where I: IntoIterator<Item = A>,
|
||||
A: ToString,
|
||||
{
|
||||
let originals = originals.into_iter().map(|s| s.to_string()).collect();
|
||||
QueryWordsMapper { originals, mappings: HashMap::new() }
|
||||
}
|
||||
|
||||
#[allow(clippy::len_zero)]
|
||||
pub fn declare<I, A>(&mut self, range: Range<usize>, id: QueryId, replacement: I)
|
||||
where I: IntoIterator<Item = A>,
|
||||
A: ToString,
|
||||
{
|
||||
assert!(range.len() != 0);
|
||||
assert!(self.originals.get(range.clone()).is_some());
|
||||
assert!(id >= self.originals.len());
|
||||
|
||||
let replacement: Vec<_> = replacement.into_iter().map(|s| s.to_string()).collect();
|
||||
|
||||
assert!(!replacement.is_empty());
|
||||
|
||||
// We detect words at the end and at the front of the
|
||||
// replacement that are common with the originals:
|
||||
//
|
||||
// x a b c d e f g
|
||||
// ^^^/ \^^^
|
||||
// a b x c d k j e f
|
||||
// ^^^ ^^^
|
||||
//
|
||||
|
||||
let left = &self.originals[..range.start];
|
||||
let right = &self.originals[range.end..];
|
||||
|
||||
let common_left = longest_common_prefix(left, &replacement);
|
||||
let common_right = longest_common_prefix(&replacement, right);
|
||||
|
||||
for i in 0..common_left {
|
||||
let range = range.start - common_left + i..range.start - common_left + i + 1;
|
||||
let replacement = vec![replacement[i].clone()];
|
||||
self.mappings.insert(id + i, (range, replacement));
|
||||
}
|
||||
|
||||
{
|
||||
let replacement = replacement[common_left..replacement.len() - common_right].to_vec();
|
||||
self.mappings.insert(id + common_left, (range.clone(), replacement));
|
||||
}
|
||||
|
||||
for i in 0..common_right {
|
||||
let id = id + replacement.len() - common_right + i;
|
||||
let range = range.end + i..range.end + i + 1;
|
||||
let replacement = vec![replacement[replacement.len() - common_right + i].clone()];
|
||||
self.mappings.insert(id, (range, replacement));
|
||||
}
|
||||
}
|
||||
|
||||
pub fn mapping(self) -> HashMap<QueryId, Range<usize>> {
|
||||
let mappings = self.mappings.into_iter().map(|(i, (r, v))| (r, (i, v)));
|
||||
let intervals = IntervalTree::from_iter(mappings);
|
||||
|
||||
let mut output = HashMap::new();
|
||||
let mut offset = 0;
|
||||
|
||||
// We map each original word to the biggest number of
|
||||
// associated words.
|
||||
for i in 0..self.originals.len() {
|
||||
let max = intervals.query_point(i)
|
||||
.filter_map(|e| {
|
||||
if e.range.end - 1 == i {
|
||||
let len = e.value.1.iter().skip(i - e.range.start).count();
|
||||
if len != 0 { Some(len) } else { None }
|
||||
} else { None }
|
||||
})
|
||||
.max()
|
||||
.unwrap_or(1);
|
||||
|
||||
let range = i + offset..i + offset + max;
|
||||
output.insert(i, range);
|
||||
offset += max - 1;
|
||||
}
|
||||
|
||||
// We retrieve the range that each original word
|
||||
// is mapped to and apply it to each of the words.
|
||||
for i in 0..self.originals.len() {
|
||||
|
||||
let iter = intervals.query_point(i).filter(|e| e.range.end - 1 == i);
|
||||
for Element { range, value: (id, words) } in iter {
|
||||
|
||||
// We ask for the complete range mapped to the area we map.
|
||||
let start = output.get(&range.start).map(|r| r.start).unwrap_or(range.start);
|
||||
let end = output.get(&(range.end - 1)).map(|r| r.end).unwrap_or(range.end);
|
||||
let range = start..end;
|
||||
|
||||
// We map each query id to one word until the last,
|
||||
// we map it to the remainings words.
|
||||
let add = range.len() - words.len();
|
||||
for (j, x) in range.take(words.len()).enumerate() {
|
||||
let add = if j == words.len() - 1 { add } else { 0 }; // is last?
|
||||
let range = x..x + 1 + add;
|
||||
output.insert(id + j, range);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
output
|
||||
}
|
||||
}
|
||||
|
||||
fn longest_common_prefix<T: Eq + std::fmt::Debug>(a: &[T], b: &[T]) -> usize {
|
||||
let mut best = None;
|
||||
for i in (0..a.len()).rev() {
|
||||
let count = a[i..].iter().zip(b).take_while(|(a, b)| a == b).count();
|
||||
best = match best {
|
||||
Some(old) if count > old => Some(count),
|
||||
Some(_) => break,
|
||||
None => Some(count),
|
||||
};
|
||||
}
|
||||
best.unwrap_or(0)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn original_unmodified() {
|
||||
let query = ["new", "york", "city", "subway"];
|
||||
// 0 1 2 3
|
||||
let mut builder = QueryWordsMapper::new(&query);
|
||||
|
||||
// new york = new york city
|
||||
builder.declare(0..2, 4, &["new", "york", "city"]);
|
||||
// ^ 4 5 6
|
||||
|
||||
// new = new york city
|
||||
builder.declare(0..1, 7, &["new", "york", "city"]);
|
||||
// ^ 7 8 9
|
||||
|
||||
let mapping = builder.mapping();
|
||||
|
||||
assert_eq!(mapping[&0], 0..1); // new
|
||||
assert_eq!(mapping[&1], 1..2); // york
|
||||
assert_eq!(mapping[&2], 2..3); // city
|
||||
assert_eq!(mapping[&3], 3..4); // subway
|
||||
|
||||
assert_eq!(mapping[&4], 0..1); // new
|
||||
assert_eq!(mapping[&5], 1..2); // york
|
||||
assert_eq!(mapping[&6], 2..3); // city
|
||||
|
||||
assert_eq!(mapping[&7], 0..1); // new
|
||||
assert_eq!(mapping[&8], 1..2); // york
|
||||
assert_eq!(mapping[&9], 2..3); // city
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn original_unmodified2() {
|
||||
let query = ["new", "york", "city", "subway"];
|
||||
// 0 1 2 3
|
||||
let mut builder = QueryWordsMapper::new(&query);
|
||||
|
||||
// city subway = new york city underground train
|
||||
builder.declare(2..4, 4, &["new", "york", "city", "underground", "train"]);
|
||||
// ^ 4 5 6 7 8
|
||||
|
||||
let mapping = builder.mapping();
|
||||
|
||||
assert_eq!(mapping[&0], 0..1); // new
|
||||
assert_eq!(mapping[&1], 1..2); // york
|
||||
assert_eq!(mapping[&2], 2..3); // city
|
||||
assert_eq!(mapping[&3], 3..5); // subway
|
||||
|
||||
assert_eq!(mapping[&4], 0..1); // new
|
||||
assert_eq!(mapping[&5], 1..2); // york
|
||||
assert_eq!(mapping[&6], 2..3); // city
|
||||
assert_eq!(mapping[&7], 3..4); // underground
|
||||
assert_eq!(mapping[&8], 4..5); // train
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn original_unmodified3() {
|
||||
let query = ["a", "b", "x", "x", "a", "b", "c", "d", "e", "f", "g"];
|
||||
// 0 1 2 3 4 5 6 7 8 9 10
|
||||
let mut builder = QueryWordsMapper::new(&query);
|
||||
|
||||
// c d = a b x c d k j e f
|
||||
builder.declare(6..8, 11, &["a", "b", "x", "c", "d", "k", "j", "e", "f"]);
|
||||
// ^^ 11 12 13 14 15 16 17 18 19
|
||||
|
||||
let mapping = builder.mapping();
|
||||
|
||||
assert_eq!(mapping[&0], 0..1); // a
|
||||
assert_eq!(mapping[&1], 1..2); // b
|
||||
assert_eq!(mapping[&2], 2..3); // x
|
||||
assert_eq!(mapping[&3], 3..4); // x
|
||||
assert_eq!(mapping[&4], 4..5); // a
|
||||
assert_eq!(mapping[&5], 5..6); // b
|
||||
assert_eq!(mapping[&6], 6..7); // c
|
||||
assert_eq!(mapping[&7], 7..11); // d
|
||||
assert_eq!(mapping[&8], 11..12); // e
|
||||
assert_eq!(mapping[&9], 12..13); // f
|
||||
assert_eq!(mapping[&10], 13..14); // g
|
||||
|
||||
assert_eq!(mapping[&11], 4..5); // a
|
||||
assert_eq!(mapping[&12], 5..6); // b
|
||||
assert_eq!(mapping[&13], 6..7); // x
|
||||
assert_eq!(mapping[&14], 7..8); // c
|
||||
assert_eq!(mapping[&15], 8..9); // d
|
||||
assert_eq!(mapping[&16], 9..10); // k
|
||||
assert_eq!(mapping[&17], 10..11); // j
|
||||
assert_eq!(mapping[&18], 11..12); // e
|
||||
assert_eq!(mapping[&19], 12..13); // f
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn simple_growing() {
|
||||
let query = ["new", "york", "subway"];
|
||||
// 0 1 2
|
||||
let mut builder = QueryWordsMapper::new(&query);
|
||||
|
||||
// new york = new york city
|
||||
builder.declare(0..2, 3, &["new", "york", "city"]);
|
||||
// ^ 3 4 5
|
||||
|
||||
let mapping = builder.mapping();
|
||||
|
||||
assert_eq!(mapping[&0], 0..1); // new
|
||||
assert_eq!(mapping[&1], 1..3); // york
|
||||
assert_eq!(mapping[&2], 3..4); // subway
|
||||
assert_eq!(mapping[&3], 0..1); // new
|
||||
assert_eq!(mapping[&4], 1..2); // york
|
||||
assert_eq!(mapping[&5], 2..3); // city
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn same_place_growings() {
|
||||
let query = ["NY", "subway"];
|
||||
// 0 1
|
||||
let mut builder = QueryWordsMapper::new(&query);
|
||||
|
||||
// NY = new york
|
||||
builder.declare(0..1, 2, &["new", "york"]);
|
||||
// ^ 2 3
|
||||
|
||||
// NY = new york city
|
||||
builder.declare(0..1, 4, &["new", "york", "city"]);
|
||||
// ^ 4 5 6
|
||||
|
||||
// NY = NYC
|
||||
builder.declare(0..1, 7, &["NYC"]);
|
||||
// ^ 7
|
||||
|
||||
// NY = new york city
|
||||
builder.declare(0..1, 8, &["new", "york", "city"]);
|
||||
// ^ 8 9 10
|
||||
|
||||
// subway = underground train
|
||||
builder.declare(1..2, 11, &["underground", "train"]);
|
||||
// ^ 11 12
|
||||
|
||||
let mapping = builder.mapping();
|
||||
|
||||
assert_eq!(mapping[&0], 0..3); // NY
|
||||
assert_eq!(mapping[&1], 3..5); // subway
|
||||
assert_eq!(mapping[&2], 0..1); // new
|
||||
assert_eq!(mapping[&3], 1..3); // york
|
||||
assert_eq!(mapping[&4], 0..1); // new
|
||||
assert_eq!(mapping[&5], 1..2); // york
|
||||
assert_eq!(mapping[&6], 2..3); // city
|
||||
assert_eq!(mapping[&7], 0..3); // NYC
|
||||
assert_eq!(mapping[&8], 0..1); // new
|
||||
assert_eq!(mapping[&9], 1..2); // york
|
||||
assert_eq!(mapping[&10], 2..3); // city
|
||||
assert_eq!(mapping[&11], 3..4); // underground
|
||||
assert_eq!(mapping[&12], 4..5); // train
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bigger_growing() {
|
||||
let query = ["NYC", "subway"];
|
||||
// 0 1
|
||||
let mut builder = QueryWordsMapper::new(&query);
|
||||
|
||||
// NYC = new york city
|
||||
builder.declare(0..1, 2, &["new", "york", "city"]);
|
||||
// ^ 2 3 4
|
||||
|
||||
let mapping = builder.mapping();
|
||||
|
||||
assert_eq!(mapping[&0], 0..3); // NYC
|
||||
assert_eq!(mapping[&1], 3..4); // subway
|
||||
assert_eq!(mapping[&2], 0..1); // new
|
||||
assert_eq!(mapping[&3], 1..2); // york
|
||||
assert_eq!(mapping[&4], 2..3); // city
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn middle_query_growing() {
|
||||
let query = ["great", "awesome", "NYC", "subway"];
|
||||
// 0 1 2 3
|
||||
let mut builder = QueryWordsMapper::new(&query);
|
||||
|
||||
// NYC = new york city
|
||||
builder.declare(2..3, 4, &["new", "york", "city"]);
|
||||
// ^ 4 5 6
|
||||
|
||||
let mapping = builder.mapping();
|
||||
|
||||
assert_eq!(mapping[&0], 0..1); // great
|
||||
assert_eq!(mapping[&1], 1..2); // awesome
|
||||
assert_eq!(mapping[&2], 2..5); // NYC
|
||||
assert_eq!(mapping[&3], 5..6); // subway
|
||||
assert_eq!(mapping[&4], 2..3); // new
|
||||
assert_eq!(mapping[&5], 3..4); // york
|
||||
assert_eq!(mapping[&6], 4..5); // city
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn end_query_growing() {
|
||||
let query = ["NYC", "subway"];
|
||||
// 0 1
|
||||
let mut builder = QueryWordsMapper::new(&query);
|
||||
|
||||
// NYC = new york city
|
||||
builder.declare(1..2, 2, &["underground", "train"]);
|
||||
// ^ 2 3
|
||||
|
||||
let mapping = builder.mapping();
|
||||
|
||||
assert_eq!(mapping[&0], 0..1); // NYC
|
||||
assert_eq!(mapping[&1], 1..3); // subway
|
||||
assert_eq!(mapping[&2], 1..2); // underground
|
||||
assert_eq!(mapping[&3], 2..3); // train
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn multiple_growings() {
|
||||
let query = ["great", "awesome", "NYC", "subway"];
|
||||
// 0 1 2 3
|
||||
let mut builder = QueryWordsMapper::new(&query);
|
||||
|
||||
// NYC = new york city
|
||||
builder.declare(2..3, 4, &["new", "york", "city"]);
|
||||
// ^ 4 5 6
|
||||
|
||||
// subway = underground train
|
||||
builder.declare(3..4, 7, &["underground", "train"]);
|
||||
// ^ 7 8
|
||||
|
||||
let mapping = builder.mapping();
|
||||
|
||||
assert_eq!(mapping[&0], 0..1); // great
|
||||
assert_eq!(mapping[&1], 1..2); // awesome
|
||||
assert_eq!(mapping[&2], 2..5); // NYC
|
||||
assert_eq!(mapping[&3], 5..7); // subway
|
||||
assert_eq!(mapping[&4], 2..3); // new
|
||||
assert_eq!(mapping[&5], 3..4); // york
|
||||
assert_eq!(mapping[&6], 4..5); // city
|
||||
assert_eq!(mapping[&7], 5..6); // underground
|
||||
assert_eq!(mapping[&8], 6..7); // train
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn multiple_probable_growings() {
|
||||
let query = ["great", "awesome", "NYC", "subway"];
|
||||
// 0 1 2 3
|
||||
let mut builder = QueryWordsMapper::new(&query);
|
||||
|
||||
// NYC = new york city
|
||||
builder.declare(2..3, 4, &["new", "york", "city"]);
|
||||
// ^ 4 5 6
|
||||
|
||||
// subway = underground train
|
||||
builder.declare(3..4, 7, &["underground", "train"]);
|
||||
// ^ 7 8
|
||||
|
||||
// great awesome = good
|
||||
builder.declare(0..2, 9, &["good"]);
|
||||
// ^ 9
|
||||
|
||||
// awesome NYC = NY
|
||||
builder.declare(1..3, 10, &["NY"]);
|
||||
// ^^ 10
|
||||
|
||||
// NYC subway = metro
|
||||
builder.declare(2..4, 11, &["metro"]);
|
||||
// ^^ 11
|
||||
|
||||
let mapping = builder.mapping();
|
||||
|
||||
assert_eq!(mapping[&0], 0..1); // great
|
||||
assert_eq!(mapping[&1], 1..2); // awesome
|
||||
assert_eq!(mapping[&2], 2..5); // NYC
|
||||
assert_eq!(mapping[&3], 5..7); // subway
|
||||
assert_eq!(mapping[&4], 2..3); // new
|
||||
assert_eq!(mapping[&5], 3..4); // york
|
||||
assert_eq!(mapping[&6], 4..5); // city
|
||||
assert_eq!(mapping[&7], 5..6); // underground
|
||||
assert_eq!(mapping[&8], 6..7); // train
|
||||
assert_eq!(mapping[&9], 0..2); // good
|
||||
assert_eq!(mapping[&10], 1..5); // NY
|
||||
assert_eq!(mapping[&11], 2..7); // metro
|
||||
}
|
||||
}
|
@ -1,41 +0,0 @@
|
||||
use std::io::{Read, Write};
|
||||
|
||||
use hashbrown::HashMap;
|
||||
use meilisearch_schema::FieldId;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::{DocumentId, Number};
|
||||
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(transparent)]
|
||||
pub struct RankedMap(HashMap<(DocumentId, FieldId), Number>);
|
||||
|
||||
impl RankedMap {
|
||||
pub fn len(&self) -> usize {
|
||||
self.0.len()
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.0.is_empty()
|
||||
}
|
||||
|
||||
pub fn insert(&mut self, document: DocumentId, field: FieldId, number: Number) {
|
||||
self.0.insert((document, field), number);
|
||||
}
|
||||
|
||||
pub fn remove(&mut self, document: DocumentId, field: FieldId) {
|
||||
self.0.remove(&(document, field));
|
||||
}
|
||||
|
||||
pub fn get(&self, document: DocumentId, field: FieldId) -> Option<Number> {
|
||||
self.0.get(&(document, field)).cloned()
|
||||
}
|
||||
|
||||
pub fn read_from_bin<R: Read>(reader: R) -> bincode::Result<RankedMap> {
|
||||
bincode::deserialize_from(reader).map(RankedMap)
|
||||
}
|
||||
|
||||
pub fn write_to_bin<W: Write>(&self, writer: W) -> bincode::Result<()> {
|
||||
bincode::serialize_into(writer, &self.0)
|
||||
}
|
||||
}
|
@ -1,51 +0,0 @@
|
||||
use compact_arena::SmallArena;
|
||||
use sdset::SetBuf;
|
||||
use crate::DocIndex;
|
||||
use crate::bucket_sort::{SimpleMatch, BareMatch, PostingsListView};
|
||||
use crate::reordered_attrs::ReorderedAttrs;
|
||||
|
||||
pub struct RawDocument<'a, 'tag> {
|
||||
pub id: crate::DocumentId,
|
||||
pub bare_matches: &'a mut [BareMatch<'tag>],
|
||||
pub processed_matches: Vec<SimpleMatch>,
|
||||
/// The list of minimum `distance` found
|
||||
pub processed_distances: Vec<Option<u8>>,
|
||||
/// Does this document contains a field
|
||||
/// with one word that is exactly matching
|
||||
pub contains_one_word_field: bool,
|
||||
}
|
||||
|
||||
impl<'a, 'tag> RawDocument<'a, 'tag> {
|
||||
pub fn new<'txn>(
|
||||
bare_matches: &'a mut [BareMatch<'tag>],
|
||||
postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
|
||||
searchable_attrs: Option<&ReorderedAttrs>,
|
||||
) -> RawDocument<'a, 'tag>
|
||||
{
|
||||
if let Some(reordered_attrs) = searchable_attrs {
|
||||
for bm in bare_matches.iter() {
|
||||
let postings_list = &postings_lists[bm.postings_list];
|
||||
|
||||
let mut rewritten = Vec::new();
|
||||
for di in postings_list.iter() {
|
||||
if let Some(attribute) = reordered_attrs.get(di.attribute) {
|
||||
rewritten.push(DocIndex { attribute, ..*di });
|
||||
}
|
||||
}
|
||||
|
||||
let new_postings = SetBuf::from_dirty(rewritten);
|
||||
postings_lists[bm.postings_list].rewrite_with(new_postings);
|
||||
}
|
||||
}
|
||||
|
||||
bare_matches.sort_unstable_by_key(|m| m.query_index);
|
||||
|
||||
RawDocument {
|
||||
id: bare_matches[0].document_id,
|
||||
bare_matches,
|
||||
processed_matches: Vec::new(),
|
||||
processed_distances: Vec::new(),
|
||||
contains_one_word_field: false,
|
||||
}
|
||||
}
|
||||
}
|
@ -1,344 +0,0 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::convert::TryFrom;
|
||||
|
||||
use meilisearch_schema::IndexedPos;
|
||||
use meilisearch_tokenizer::analyzer::{Analyzer, AnalyzerConfig};
|
||||
use meilisearch_tokenizer::{Token, token::SeparatorKind, TokenKind};
|
||||
use sdset::SetBuf;
|
||||
|
||||
use crate::{DocIndex, DocumentId};
|
||||
use crate::FstSetCow;
|
||||
|
||||
const WORD_LENGTH_LIMIT: usize = 80;
|
||||
|
||||
type Word = Vec<u8>; // TODO make it be a SmallVec
|
||||
|
||||
pub struct RawIndexer<'a, A> {
|
||||
word_limit: usize, // the maximum number of indexed words
|
||||
words_doc_indexes: BTreeMap<Word, Vec<DocIndex>>,
|
||||
docs_words: HashMap<DocumentId, Vec<Word>>,
|
||||
analyzer: Analyzer<'a, A>,
|
||||
}
|
||||
|
||||
pub struct Indexed<'a> {
|
||||
pub words_doc_indexes: BTreeMap<Word, SetBuf<DocIndex>>,
|
||||
pub docs_words: HashMap<DocumentId, FstSetCow<'a>>,
|
||||
}
|
||||
|
||||
impl<'a, A> RawIndexer<'a, A>
|
||||
where
|
||||
A: AsRef<[u8]>
|
||||
{
|
||||
pub fn new(stop_words: &'a fst::Set<A>) -> RawIndexer<'a, A> {
|
||||
RawIndexer::with_word_limit(stop_words, 1000)
|
||||
}
|
||||
|
||||
pub fn with_word_limit(stop_words: &'a fst::Set<A>, limit: usize) -> RawIndexer<A> {
|
||||
RawIndexer {
|
||||
word_limit: limit,
|
||||
words_doc_indexes: BTreeMap::new(),
|
||||
docs_words: HashMap::new(),
|
||||
analyzer: Analyzer::new(AnalyzerConfig::default_with_stopwords(stop_words)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn index_text(&mut self, id: DocumentId, indexed_pos: IndexedPos, text: &str) -> usize {
|
||||
let mut number_of_words = 0;
|
||||
|
||||
let analyzed_text = self.analyzer.analyze(text);
|
||||
for (token_pos, (word_pos, token)) in process_tokens(analyzed_text.tokens()).enumerate() {
|
||||
let must_continue = index_token(
|
||||
token,
|
||||
word_pos,
|
||||
token_pos,
|
||||
id,
|
||||
indexed_pos,
|
||||
self.word_limit,
|
||||
&mut self.words_doc_indexes,
|
||||
&mut self.docs_words,
|
||||
);
|
||||
|
||||
number_of_words += 1;
|
||||
|
||||
if !must_continue {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
number_of_words
|
||||
}
|
||||
|
||||
pub fn index_text_seq<'s, I>(&mut self, id: DocumentId, indexed_pos: IndexedPos, text_iter: I)
|
||||
where
|
||||
I: IntoIterator<Item = &'s str>,
|
||||
{
|
||||
let mut word_offset = 0;
|
||||
|
||||
for text in text_iter.into_iter() {
|
||||
let current_word_offset = word_offset;
|
||||
|
||||
let analyzed_text = self.analyzer.analyze(text);
|
||||
let tokens = process_tokens(analyzed_text.tokens())
|
||||
.map(|(i, t)| (i + current_word_offset, t))
|
||||
.enumerate();
|
||||
|
||||
for (token_pos, (word_pos, token)) in tokens {
|
||||
word_offset = word_pos + 1;
|
||||
|
||||
let must_continue = index_token(
|
||||
token,
|
||||
word_pos,
|
||||
token_pos,
|
||||
id,
|
||||
indexed_pos,
|
||||
self.word_limit,
|
||||
&mut self.words_doc_indexes,
|
||||
&mut self.docs_words,
|
||||
);
|
||||
|
||||
if !must_continue {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn build(self) -> Indexed<'static> {
|
||||
let words_doc_indexes = self
|
||||
.words_doc_indexes
|
||||
.into_iter()
|
||||
.map(|(word, indexes)| (word, SetBuf::from_dirty(indexes)))
|
||||
.collect();
|
||||
|
||||
let docs_words = self
|
||||
.docs_words
|
||||
.into_iter()
|
||||
.map(|(id, mut words)| {
|
||||
words.sort_unstable();
|
||||
words.dedup();
|
||||
let fst = fst::Set::from_iter(words).unwrap().map_data(Cow::Owned).unwrap();
|
||||
(id, fst)
|
||||
})
|
||||
.collect();
|
||||
|
||||
Indexed {
|
||||
words_doc_indexes,
|
||||
docs_words,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn process_tokens<'a>(tokens: impl Iterator<Item = Token<'a>>) -> impl Iterator<Item = (usize, Token<'a>)> {
|
||||
tokens
|
||||
.skip_while(|token| !token.is_word())
|
||||
.scan((0, None), |(offset, prev_kind), token| {
|
||||
match token.kind {
|
||||
TokenKind::Word | TokenKind::StopWord | TokenKind::Unknown => {
|
||||
*offset += match *prev_kind {
|
||||
Some(TokenKind::Separator(SeparatorKind::Hard)) => 8,
|
||||
Some(_) => 1,
|
||||
None => 0,
|
||||
};
|
||||
*prev_kind = Some(token.kind)
|
||||
}
|
||||
TokenKind::Separator(SeparatorKind::Hard) => {
|
||||
*prev_kind = Some(token.kind);
|
||||
}
|
||||
TokenKind::Separator(SeparatorKind::Soft)
|
||||
if *prev_kind != Some(TokenKind::Separator(SeparatorKind::Hard)) => {
|
||||
*prev_kind = Some(token.kind);
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
Some((*offset, token))
|
||||
})
|
||||
.filter(|(_, t)| t.is_word())
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn index_token(
|
||||
token: Token,
|
||||
word_pos: usize,
|
||||
token_pos: usize,
|
||||
id: DocumentId,
|
||||
indexed_pos: IndexedPos,
|
||||
word_limit: usize,
|
||||
words_doc_indexes: &mut BTreeMap<Word, Vec<DocIndex>>,
|
||||
docs_words: &mut HashMap<DocumentId, Vec<Word>>,
|
||||
) -> bool
|
||||
{
|
||||
if token_pos >= word_limit {
|
||||
return false;
|
||||
}
|
||||
|
||||
if !token.is_stopword() {
|
||||
match token_to_docindex(id, indexed_pos, &token, word_pos) {
|
||||
Some(docindex) => {
|
||||
let word = Vec::from(token.word.as_ref());
|
||||
|
||||
if word.len() <= WORD_LENGTH_LIMIT {
|
||||
words_doc_indexes
|
||||
.entry(word.clone())
|
||||
.or_insert_with(Vec::new)
|
||||
.push(docindex);
|
||||
docs_words.entry(id).or_insert_with(Vec::new).push(word);
|
||||
}
|
||||
}
|
||||
None => return false,
|
||||
}
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
fn token_to_docindex(id: DocumentId, indexed_pos: IndexedPos, token: &Token, word_index: usize) -> Option<DocIndex> {
|
||||
let word_index = u16::try_from(word_index).ok()?;
|
||||
let char_index = u16::try_from(token.byte_start).ok()?;
|
||||
let char_length = u16::try_from(token.word.len()).ok()?;
|
||||
|
||||
let docindex = DocIndex {
|
||||
document_id: id,
|
||||
attribute: indexed_pos.0,
|
||||
word_index,
|
||||
char_index,
|
||||
char_length,
|
||||
};
|
||||
|
||||
Some(docindex)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use meilisearch_schema::IndexedPos;
|
||||
use meilisearch_tokenizer::{Analyzer, AnalyzerConfig};
|
||||
use fst::Set;
|
||||
|
||||
#[test]
|
||||
fn test_process_token() {
|
||||
let text = " 為一包含一千多萬目詞的帶標記平衡語料庫";
|
||||
let stopwords = Set::default();
|
||||
let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stopwords));
|
||||
let analyzer = analyzer.analyze(text);
|
||||
let tokens: Vec<_> = process_tokens(analyzer.tokens()).map(|(_, t)| t.text().to_string()).collect();
|
||||
assert_eq!(tokens, ["为", "一", "包含", "一千多万", "目词", "的", "带", "标记", "平衡", "语料库"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strange_apostrophe() {
|
||||
let stop_words = fst::Set::default();
|
||||
let mut indexer = RawIndexer::new(&stop_words);
|
||||
|
||||
let docid = DocumentId(0);
|
||||
let indexed_pos = IndexedPos(0);
|
||||
let text = "Zut, l’aspirateur, j’ai oublié de l’éteindre !";
|
||||
indexer.index_text(docid, indexed_pos, text);
|
||||
|
||||
let Indexed {
|
||||
words_doc_indexes, ..
|
||||
} = indexer.build();
|
||||
|
||||
assert!(words_doc_indexes.get(&b"l"[..]).is_some());
|
||||
assert!(words_doc_indexes.get(&b"aspirateur"[..]).is_some());
|
||||
assert!(words_doc_indexes.get(&b"ai"[..]).is_some());
|
||||
assert!(words_doc_indexes.get(&b"eteindre"[..]).is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strange_apostrophe_in_sequence() {
|
||||
let stop_words = fst::Set::default();
|
||||
let mut indexer = RawIndexer::new(&stop_words);
|
||||
|
||||
let docid = DocumentId(0);
|
||||
let indexed_pos = IndexedPos(0);
|
||||
let text = vec!["Zut, l’aspirateur, j’ai oublié de l’éteindre !"];
|
||||
indexer.index_text_seq(docid, indexed_pos, text);
|
||||
|
||||
let Indexed {
|
||||
words_doc_indexes, ..
|
||||
} = indexer.build();
|
||||
|
||||
assert!(words_doc_indexes.get(&b"l"[..]).is_some());
|
||||
assert!(words_doc_indexes.get(&b"aspirateur"[..]).is_some());
|
||||
assert!(words_doc_indexes.get(&b"ai"[..]).is_some());
|
||||
assert!(words_doc_indexes.get(&b"eteindre"[..]).is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn basic_stop_words() {
|
||||
let stop_words = sdset::SetBuf::from_dirty(vec!["l", "j", "ai", "de"]);
|
||||
let stop_words = fst::Set::from_iter(stop_words).unwrap();
|
||||
|
||||
let mut indexer = RawIndexer::new(&stop_words);
|
||||
|
||||
let docid = DocumentId(0);
|
||||
let indexed_pos = IndexedPos(0);
|
||||
let text = "Zut, l’aspirateur, j’ai oublié de l’éteindre !";
|
||||
indexer.index_text(docid, indexed_pos, text);
|
||||
|
||||
let Indexed {
|
||||
words_doc_indexes, ..
|
||||
} = indexer.build();
|
||||
|
||||
assert!(words_doc_indexes.get(&b"l"[..]).is_none());
|
||||
assert!(words_doc_indexes.get(&b"aspirateur"[..]).is_some());
|
||||
assert!(words_doc_indexes.get(&b"j"[..]).is_none());
|
||||
assert!(words_doc_indexes.get(&b"ai"[..]).is_none());
|
||||
assert!(words_doc_indexes.get(&b"de"[..]).is_none());
|
||||
assert!(words_doc_indexes.get(&b"eteindre"[..]).is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_empty_unidecode() {
|
||||
let stop_words = fst::Set::default();
|
||||
let mut indexer = RawIndexer::new(&stop_words);
|
||||
|
||||
let docid = DocumentId(0);
|
||||
let indexed_pos = IndexedPos(0);
|
||||
let text = "🇯🇵";
|
||||
indexer.index_text(docid, indexed_pos, text);
|
||||
|
||||
let Indexed {
|
||||
words_doc_indexes, ..
|
||||
} = indexer.build();
|
||||
|
||||
assert!(words_doc_indexes
|
||||
.get(&"🇯🇵".to_owned().into_bytes())
|
||||
.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
// test sample from 807
|
||||
fn very_long_text() {
|
||||
let stop_words = fst::Set::default();
|
||||
let mut indexer = RawIndexer::new(&stop_words);
|
||||
let indexed_pos = IndexedPos(0);
|
||||
let docid = DocumentId(0);
|
||||
let text = " The locations block is the most powerful, and potentially most involved, section of the .platform.app.yaml file. It allows you to control how the application container responds to incoming requests at a very fine-grained level. Common patterns also vary between language containers due to the way PHP-FPM handles incoming requests.\nEach entry of the locations block is an absolute URI path (with leading /) and its value includes the configuration directives for how the web server should handle matching requests. That is, if your domain is example.com then '/' means “requests for example.com/”, while '/admin' means “requests for example.com/admin”. If multiple blocks could match an incoming request then the most-specific will apply.\nweb:locations:'/':# Rules for all requests that don't otherwise match....'/sites/default/files':# Rules for any requests that begin with /sites/default/files....The simplest possible locations configuration is one that simply passes all requests on to your application unconditionally:\nweb:locations:'/':passthru:trueThat is, all requests to /* should be forwarded to the process started by web.commands.start above. Note that for PHP containers the passthru key must specify what PHP file the request should be forwarded to, and must also specify a docroot under which the file lives. For example:\nweb:locations:'/':root:'web'passthru:'/app.php'This block will serve requests to / from the web directory in the application, and if a file doesn’t exist on disk then the request will be forwarded to the /app.php script.\nA full list of the possible subkeys for locations is below.\n root: The folder from which to serve static assets for this location relative to the application root. The application root is the directory in which the .platform.app.yaml file is located. Typical values for this property include public or web. Setting it to '' is not recommended, and its behavior may vary depending on the type of application. Absolute paths are not supported.\n passthru: Whether to forward disallowed and missing resources from this location to the application and can be true, false or an absolute URI path (with leading /). The default value is false. For non-PHP applications it will generally be just true or false. In a PHP application this will typically be the front controller such as /index.php or /app.php. This entry works similar to mod_rewrite under Apache. Note: If the value of passthru does not begin with the same value as the location key it is under, the passthru may evaluate to another entry. That may be useful when you want different cache settings for different paths, for instance, but want missing files in all of them to map back to the same front controller. See the example block below.\n index: The files to consider when serving a request for a directory: an array of file names or null. (typically ['index.html']). Note that in order for this to work, access to the static files named must be allowed by the allow or rules keys for this location.\n expires: How long to allow static assets from this location to be cached (this enables the Cache-Control and Expires headers) and can be a time or -1 for no caching (default). Times can be suffixed with “ms” (milliseconds), “s” (seconds), “m” (minutes), “h” (hours), “d” (days), “w” (weeks), “M” (months, 30d) or “y” (years, 365d).\n scripts: Whether to allow loading scripts in that location (true or false). This directive is only meaningful on PHP.\n allow: Whether to allow serving files which don’t match a rule (true or false, default: true).\n headers: Any additional headers to apply to static assets. This section is a mapping of header names to header values. Responses from the application aren’t affected, to avoid overlap with the application’s own ability to include custom headers in the response.\n rules: Specific overrides for a specific location. The key is a PCRE (regular expression) that is matched against the full request path.\n request_buffering: Most application servers do not support chunked requests (e.g. fpm, uwsgi), so Platform.sh enables request_buffering by default to handle them. That default configuration would look like this if it was present in .platform.app.yaml:\nweb:locations:'/':passthru:truerequest_buffering:enabled:truemax_request_size:250mIf the application server can already efficiently handle chunked requests, the request_buffering subkey can be modified to disable it entirely (enabled: false). Additionally, applications that frequently deal with uploads greater than 250MB in size can update the max_request_size key to the application’s needs. Note that modifications to request_buffering will need to be specified at each location where it is desired.\n ";
|
||||
indexer.index_text(docid, indexed_pos, text);
|
||||
let Indexed {
|
||||
words_doc_indexes, ..
|
||||
} = indexer.build();
|
||||
assert!(words_doc_indexes.get(&"request".to_owned().into_bytes()).is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn words_over_index_1000_not_indexed() {
|
||||
let stop_words = fst::Set::default();
|
||||
let mut indexer = RawIndexer::new(&stop_words);
|
||||
let indexed_pos = IndexedPos(0);
|
||||
let docid = DocumentId(0);
|
||||
let mut text = String::with_capacity(5000);
|
||||
for _ in 0..1000 {
|
||||
text.push_str("less ");
|
||||
}
|
||||
text.push_str("more");
|
||||
indexer.index_text(docid, indexed_pos, &text);
|
||||
let Indexed {
|
||||
words_doc_indexes, ..
|
||||
} = indexer.build();
|
||||
assert!(words_doc_indexes.get(&"less".to_owned().into_bytes()).is_some());
|
||||
assert!(words_doc_indexes.get(&"more".to_owned().into_bytes()).is_none());
|
||||
}
|
||||
}
|
@ -1,31 +0,0 @@
|
||||
use std::cmp;
|
||||
|
||||
#[derive(Default, Clone)]
|
||||
pub struct ReorderedAttrs {
|
||||
reorders: Vec<Option<u16>>,
|
||||
reverse: Vec<u16>,
|
||||
}
|
||||
|
||||
impl ReorderedAttrs {
|
||||
pub fn new() -> ReorderedAttrs {
|
||||
ReorderedAttrs { reorders: Vec::new(), reverse: Vec::new() }
|
||||
}
|
||||
|
||||
pub fn insert_attribute(&mut self, attribute: u16) {
|
||||
let new_len = cmp::max(attribute as usize + 1, self.reorders.len());
|
||||
self.reorders.resize(new_len, None);
|
||||
self.reorders[attribute as usize] = Some(self.reverse.len() as u16);
|
||||
self.reverse.push(attribute);
|
||||
}
|
||||
|
||||
pub fn get(&self, attribute: u16) -> Option<u16> {
|
||||
match self.reorders.get(attribute as usize)? {
|
||||
Some(attribute) => Some(*attribute),
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn reverse(&self, attribute: u16) -> Option<u16> {
|
||||
self.reverse.get(attribute as usize).copied()
|
||||
}
|
||||
}
|
@ -1,161 +0,0 @@
|
||||
use std::collections::HashSet;
|
||||
use std::io::Cursor;
|
||||
use std::{error::Error, fmt};
|
||||
|
||||
use meilisearch_schema::{Schema, FieldId};
|
||||
use serde::{de, forward_to_deserialize_any};
|
||||
use serde_json::de::IoRead as SerdeJsonIoRead;
|
||||
use serde_json::Deserializer as SerdeJsonDeserializer;
|
||||
use serde_json::Error as SerdeJsonError;
|
||||
|
||||
use crate::database::MainT;
|
||||
use crate::store::DocumentsFields;
|
||||
use crate::DocumentId;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum DeserializerError {
|
||||
SerdeJson(SerdeJsonError),
|
||||
Zlmdb(heed::Error),
|
||||
Custom(String),
|
||||
}
|
||||
|
||||
impl de::Error for DeserializerError {
|
||||
fn custom<T: fmt::Display>(msg: T) -> Self {
|
||||
DeserializerError::Custom(msg.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for DeserializerError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
DeserializerError::SerdeJson(e) => write!(f, "serde json related error: {}", e),
|
||||
DeserializerError::Zlmdb(e) => write!(f, "heed related error: {}", e),
|
||||
DeserializerError::Custom(s) => f.write_str(s),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for DeserializerError {}
|
||||
|
||||
impl From<SerdeJsonError> for DeserializerError {
|
||||
fn from(error: SerdeJsonError) -> DeserializerError {
|
||||
DeserializerError::SerdeJson(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<heed::Error> for DeserializerError {
|
||||
fn from(error: heed::Error) -> DeserializerError {
|
||||
DeserializerError::Zlmdb(error)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Deserializer<'a> {
|
||||
pub document_id: DocumentId,
|
||||
pub reader: &'a heed::RoTxn<'a, MainT>,
|
||||
pub documents_fields: DocumentsFields,
|
||||
pub schema: &'a Schema,
|
||||
pub fields: Option<&'a HashSet<FieldId>>,
|
||||
}
|
||||
|
||||
impl<'de, 'a, 'b> de::Deserializer<'de> for &'b mut Deserializer<'a> {
|
||||
type Error = DeserializerError;
|
||||
|
||||
fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
||||
where
|
||||
V: de::Visitor<'de>,
|
||||
{
|
||||
self.deserialize_option(visitor)
|
||||
}
|
||||
|
||||
fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
||||
where
|
||||
V: de::Visitor<'de>,
|
||||
{
|
||||
self.deserialize_map(visitor)
|
||||
}
|
||||
|
||||
fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
||||
where
|
||||
V: de::Visitor<'de>,
|
||||
{
|
||||
let mut error = None;
|
||||
|
||||
let iter = self
|
||||
.documents_fields
|
||||
.document_fields(self.reader, self.document_id)?
|
||||
.filter_map(|result| {
|
||||
let (attr, value) = match result {
|
||||
Ok(value) => value,
|
||||
Err(e) => {
|
||||
error = Some(e);
|
||||
return None;
|
||||
}
|
||||
};
|
||||
|
||||
let is_displayed = self.schema.is_displayed(attr);
|
||||
if is_displayed && self.fields.map_or(true, |f| f.contains(&attr)) {
|
||||
if let Some(attribute_name) = self.schema.name(attr) {
|
||||
let cursor = Cursor::new(value.to_owned());
|
||||
let ioread = SerdeJsonIoRead::new(cursor);
|
||||
let value = Value(SerdeJsonDeserializer::new(ioread));
|
||||
|
||||
Some((attribute_name, value))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
});
|
||||
|
||||
let mut iter = iter.peekable();
|
||||
|
||||
let result = match iter.peek() {
|
||||
Some(_) => {
|
||||
let map_deserializer = de::value::MapDeserializer::new(iter);
|
||||
visitor
|
||||
.visit_some(map_deserializer)
|
||||
.map_err(DeserializerError::from)
|
||||
}
|
||||
None => visitor.visit_none(),
|
||||
};
|
||||
|
||||
match error.take() {
|
||||
Some(error) => Err(error.into()),
|
||||
None => result,
|
||||
}
|
||||
}
|
||||
|
||||
forward_to_deserialize_any! {
|
||||
bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string
|
||||
bytes byte_buf unit unit_struct newtype_struct seq tuple
|
||||
tuple_struct struct enum identifier ignored_any
|
||||
}
|
||||
}
|
||||
|
||||
struct Value(SerdeJsonDeserializer<SerdeJsonIoRead<Cursor<Vec<u8>>>>);
|
||||
|
||||
impl<'de> de::IntoDeserializer<'de, SerdeJsonError> for Value {
|
||||
type Deserializer = Self;
|
||||
|
||||
fn into_deserializer(self) -> Self::Deserializer {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de> de::Deserializer<'de> for Value {
|
||||
type Error = SerdeJsonError;
|
||||
|
||||
fn deserialize_any<V>(mut self, visitor: V) -> Result<V::Value, Self::Error>
|
||||
where
|
||||
V: de::Visitor<'de>,
|
||||
{
|
||||
self.0.deserialize_any(visitor)
|
||||
}
|
||||
|
||||
forward_to_deserialize_any! {
|
||||
bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string
|
||||
bytes byte_buf option unit unit_struct newtype_struct seq tuple
|
||||
tuple_struct map struct enum identifier ignored_any
|
||||
}
|
||||
}
|
@ -1,92 +0,0 @@
|
||||
mod deserializer;
|
||||
|
||||
pub use self::deserializer::{Deserializer, DeserializerError};
|
||||
|
||||
use std::{error::Error, fmt};
|
||||
|
||||
use serde::ser;
|
||||
use serde_json::Error as SerdeJsonError;
|
||||
use meilisearch_schema::Error as SchemaError;
|
||||
|
||||
use crate::ParseNumberError;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum SerializerError {
|
||||
DocumentIdNotFound,
|
||||
InvalidDocumentIdFormat,
|
||||
Zlmdb(heed::Error),
|
||||
SerdeJson(SerdeJsonError),
|
||||
ParseNumber(ParseNumberError),
|
||||
Schema(SchemaError),
|
||||
UnserializableType { type_name: &'static str },
|
||||
UnindexableType { type_name: &'static str },
|
||||
UnrankableType { type_name: &'static str },
|
||||
Custom(String),
|
||||
}
|
||||
|
||||
impl ser::Error for SerializerError {
|
||||
fn custom<T: fmt::Display>(msg: T) -> Self {
|
||||
SerializerError::Custom(msg.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for SerializerError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
SerializerError::DocumentIdNotFound => {
|
||||
f.write_str("Primary key is missing.")
|
||||
}
|
||||
SerializerError::InvalidDocumentIdFormat => {
|
||||
f.write_str("a document primary key can be of type integer or string only composed of alphanumeric characters, hyphens (-) and underscores (_).")
|
||||
}
|
||||
SerializerError::Zlmdb(e) => write!(f, "heed related error: {}", e),
|
||||
SerializerError::SerdeJson(e) => write!(f, "serde json error: {}", e),
|
||||
SerializerError::ParseNumber(e) => {
|
||||
write!(f, "error while trying to parse a number: {}", e)
|
||||
}
|
||||
SerializerError::Schema(e) => write!(f, "impossible to update schema: {}", e),
|
||||
SerializerError::UnserializableType { type_name } => {
|
||||
write!(f, "{} is not a serializable type", type_name)
|
||||
}
|
||||
SerializerError::UnindexableType { type_name } => {
|
||||
write!(f, "{} is not an indexable type", type_name)
|
||||
}
|
||||
SerializerError::UnrankableType { type_name } => {
|
||||
write!(f, "{} types can not be used for ranking", type_name)
|
||||
}
|
||||
SerializerError::Custom(s) => f.write_str(s),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for SerializerError {}
|
||||
|
||||
impl From<String> for SerializerError {
|
||||
fn from(value: String) -> SerializerError {
|
||||
SerializerError::Custom(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<SerdeJsonError> for SerializerError {
|
||||
fn from(error: SerdeJsonError) -> SerializerError {
|
||||
SerializerError::SerdeJson(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<heed::Error> for SerializerError {
|
||||
fn from(error: heed::Error) -> SerializerError {
|
||||
SerializerError::Zlmdb(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ParseNumberError> for SerializerError {
|
||||
fn from(error: ParseNumberError) -> SerializerError {
|
||||
SerializerError::ParseNumber(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<SchemaError> for SerializerError {
|
||||
fn from(error: SchemaError) -> SerializerError {
|
||||
SerializerError::Schema(error)
|
||||
}
|
||||
}
|
@ -1,183 +0,0 @@
|
||||
use std::collections::{BTreeMap, BTreeSet};
|
||||
use std::str::FromStr;
|
||||
use std::iter::IntoIterator;
|
||||
|
||||
use serde::{Deserialize, Deserializer, Serialize};
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
use self::RankingRule::*;
|
||||
|
||||
pub const DEFAULT_RANKING_RULES: [RankingRule; 6] = [Typo, Words, Proximity, Attribute, WordsPosition, Exactness];
|
||||
|
||||
static RANKING_RULE_REGEX: Lazy<regex::Regex> = Lazy::new(|| {
|
||||
regex::Regex::new(r"(asc|desc)\(([a-zA-Z0-9-_]*)\)").unwrap()
|
||||
});
|
||||
|
||||
#[derive(Default, Clone, Serialize, Deserialize, Debug)]
|
||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||
pub struct Settings {
|
||||
#[serde(default, deserialize_with = "deserialize_some")]
|
||||
pub ranking_rules: Option<Option<Vec<String>>>,
|
||||
#[serde(default, deserialize_with = "deserialize_some")]
|
||||
pub distinct_attribute: Option<Option<String>>,
|
||||
#[serde(default, deserialize_with = "deserialize_some")]
|
||||
pub searchable_attributes: Option<Option<Vec<String>>>,
|
||||
#[serde(default, deserialize_with = "deserialize_some")]
|
||||
pub displayed_attributes: Option<Option<BTreeSet<String>>>,
|
||||
#[serde(default, deserialize_with = "deserialize_some")]
|
||||
pub stop_words: Option<Option<BTreeSet<String>>>,
|
||||
#[serde(default, deserialize_with = "deserialize_some")]
|
||||
pub synonyms: Option<Option<BTreeMap<String, Vec<String>>>>,
|
||||
#[serde(default, deserialize_with = "deserialize_some")]
|
||||
pub attributes_for_faceting: Option<Option<Vec<String>>>,
|
||||
}
|
||||
|
||||
// Any value that is present is considered Some value, including null.
|
||||
fn deserialize_some<'de, T, D>(deserializer: D) -> Result<Option<T>, D::Error>
|
||||
where T: Deserialize<'de>,
|
||||
D: Deserializer<'de>
|
||||
{
|
||||
Deserialize::deserialize(deserializer).map(Some)
|
||||
}
|
||||
|
||||
impl Settings {
|
||||
pub fn to_update(&self) -> Result<SettingsUpdate, RankingRuleConversionError> {
|
||||
let settings = self.clone();
|
||||
|
||||
let ranking_rules = match settings.ranking_rules {
|
||||
Some(Some(rules)) => UpdateState::Update(RankingRule::try_from_iter(rules.iter())?),
|
||||
Some(None) => UpdateState::Clear,
|
||||
None => UpdateState::Nothing,
|
||||
};
|
||||
|
||||
Ok(SettingsUpdate {
|
||||
ranking_rules,
|
||||
distinct_attribute: settings.distinct_attribute.into(),
|
||||
primary_key: UpdateState::Nothing,
|
||||
searchable_attributes: settings.searchable_attributes.into(),
|
||||
displayed_attributes: settings.displayed_attributes.into(),
|
||||
stop_words: settings.stop_words.into(),
|
||||
synonyms: settings.synonyms.into(),
|
||||
attributes_for_faceting: settings.attributes_for_faceting.into(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum UpdateState<T> {
|
||||
Update(T),
|
||||
Clear,
|
||||
Nothing,
|
||||
}
|
||||
|
||||
impl <T> From<Option<Option<T>>> for UpdateState<T> {
|
||||
fn from(opt: Option<Option<T>>) -> UpdateState<T> {
|
||||
match opt {
|
||||
Some(Some(t)) => UpdateState::Update(t),
|
||||
Some(None) => UpdateState::Clear,
|
||||
None => UpdateState::Nothing,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RankingRuleConversionError;
|
||||
|
||||
impl std::fmt::Display for RankingRuleConversionError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
write!(f, "impossible to convert into RankingRule")
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum RankingRule {
|
||||
Typo,
|
||||
Words,
|
||||
Proximity,
|
||||
Attribute,
|
||||
WordsPosition,
|
||||
Exactness,
|
||||
Asc(String),
|
||||
Desc(String),
|
||||
}
|
||||
|
||||
impl std::fmt::Display for RankingRule {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
match self {
|
||||
RankingRule::Typo => f.write_str("typo"),
|
||||
RankingRule::Words => f.write_str("words"),
|
||||
RankingRule::Proximity => f.write_str("proximity"),
|
||||
RankingRule::Attribute => f.write_str("attribute"),
|
||||
RankingRule::WordsPosition => f.write_str("wordsPosition"),
|
||||
RankingRule::Exactness => f.write_str("exactness"),
|
||||
RankingRule::Asc(field) => write!(f, "asc({})", field),
|
||||
RankingRule::Desc(field) => write!(f, "desc({})", field),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for RankingRule {
|
||||
type Err = RankingRuleConversionError;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
let rule = match s {
|
||||
"typo" => RankingRule::Typo,
|
||||
"words" => RankingRule::Words,
|
||||
"proximity" => RankingRule::Proximity,
|
||||
"attribute" => RankingRule::Attribute,
|
||||
"wordsPosition" => RankingRule::WordsPosition,
|
||||
"exactness" => RankingRule::Exactness,
|
||||
_ => {
|
||||
let captures = RANKING_RULE_REGEX.captures(s).ok_or(RankingRuleConversionError)?;
|
||||
match (captures.get(1).map(|m| m.as_str()), captures.get(2)) {
|
||||
(Some("asc"), Some(field)) => RankingRule::Asc(field.as_str().to_string()),
|
||||
(Some("desc"), Some(field)) => RankingRule::Desc(field.as_str().to_string()),
|
||||
_ => return Err(RankingRuleConversionError)
|
||||
}
|
||||
}
|
||||
};
|
||||
Ok(rule)
|
||||
}
|
||||
}
|
||||
|
||||
impl RankingRule {
|
||||
pub fn field(&self) -> Option<&str> {
|
||||
match self {
|
||||
RankingRule::Asc(field) | RankingRule::Desc(field) => Some(field),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn try_from_iter(rules: impl IntoIterator<Item = impl AsRef<str>>) -> Result<Vec<RankingRule>, RankingRuleConversionError> {
|
||||
rules.into_iter()
|
||||
.map(|s| RankingRule::from_str(s.as_ref()))
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SettingsUpdate {
|
||||
pub ranking_rules: UpdateState<Vec<RankingRule>>,
|
||||
pub distinct_attribute: UpdateState<String>,
|
||||
pub primary_key: UpdateState<String>,
|
||||
pub searchable_attributes: UpdateState<Vec<String>>,
|
||||
pub displayed_attributes: UpdateState<BTreeSet<String>>,
|
||||
pub stop_words: UpdateState<BTreeSet<String>>,
|
||||
pub synonyms: UpdateState<BTreeMap<String, Vec<String>>>,
|
||||
pub attributes_for_faceting: UpdateState<Vec<String>>,
|
||||
}
|
||||
|
||||
impl Default for SettingsUpdate {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
ranking_rules: UpdateState::Nothing,
|
||||
distinct_attribute: UpdateState::Nothing,
|
||||
primary_key: UpdateState::Nothing,
|
||||
searchable_attributes: UpdateState::Nothing,
|
||||
displayed_attributes: UpdateState::Nothing,
|
||||
stop_words: UpdateState::Nothing,
|
||||
synonyms: UpdateState::Nothing,
|
||||
attributes_for_faceting: UpdateState::Nothing,
|
||||
}
|
||||
}
|
||||
}
|
@ -1,32 +0,0 @@
|
||||
use std::borrow::Cow;
|
||||
|
||||
use heed::{types::CowSlice, BytesEncode, BytesDecode};
|
||||
use sdset::{Set, SetBuf};
|
||||
use zerocopy::{AsBytes, FromBytes};
|
||||
|
||||
pub struct CowSet<T>(std::marker::PhantomData<T>);
|
||||
|
||||
impl<'a, T: 'a> BytesEncode<'a> for CowSet<T>
|
||||
where
|
||||
T: AsBytes,
|
||||
{
|
||||
type EItem = Set<T>;
|
||||
|
||||
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<[u8]>> {
|
||||
CowSlice::bytes_encode(item.as_slice())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: 'a> BytesDecode<'a> for CowSet<T>
|
||||
where
|
||||
T: FromBytes + Copy,
|
||||
{
|
||||
type DItem = Cow<'a, Set<T>>;
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
match CowSlice::<T>::bytes_decode(bytes)? {
|
||||
Cow::Owned(vec) => Some(Cow::Owned(SetBuf::new_unchecked(vec))),
|
||||
Cow::Borrowed(slice) => Some(Cow::Borrowed(Set::new_unchecked(slice))),
|
||||
}
|
||||
}
|
||||
}
|
@ -1,43 +0,0 @@
|
||||
use std::borrow::Cow;
|
||||
|
||||
use heed::Result as ZResult;
|
||||
use heed::types::{ByteSlice, OwnedType};
|
||||
|
||||
use crate::database::MainT;
|
||||
use crate::{DocumentId, FstSetCow};
|
||||
use super::BEU32;
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct DocsWords {
|
||||
pub(crate) docs_words: heed::Database<OwnedType<BEU32>, ByteSlice>,
|
||||
}
|
||||
|
||||
impl DocsWords {
|
||||
pub fn put_doc_words(
|
||||
self,
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
document_id: DocumentId,
|
||||
words: &FstSetCow,
|
||||
) -> ZResult<()> {
|
||||
let document_id = BEU32::new(document_id.0);
|
||||
let bytes = words.as_fst().as_bytes();
|
||||
self.docs_words.put(writer, &document_id, bytes)
|
||||
}
|
||||
|
||||
pub fn del_doc_words(self, writer: &mut heed::RwTxn<MainT>, document_id: DocumentId) -> ZResult<bool> {
|
||||
let document_id = BEU32::new(document_id.0);
|
||||
self.docs_words.delete(writer, &document_id)
|
||||
}
|
||||
|
||||
pub fn clear(self, writer: &mut heed::RwTxn<MainT>) -> ZResult<()> {
|
||||
self.docs_words.clear(writer)
|
||||
}
|
||||
|
||||
pub fn doc_words<'a>(self, reader: &'a heed::RoTxn<'a, MainT>, document_id: DocumentId) -> ZResult<FstSetCow> {
|
||||
let document_id = BEU32::new(document_id.0);
|
||||
match self.docs_words.get(reader, &document_id)? {
|
||||
Some(bytes) => Ok(fst::Set::new(bytes).unwrap().map_data(Cow::Borrowed).unwrap()),
|
||||
None => Ok(fst::Set::default().map_data(Cow::Owned).unwrap()),
|
||||
}
|
||||
}
|
||||
}
|
@ -1,79 +0,0 @@
|
||||
use heed::types::{ByteSlice, OwnedType};
|
||||
use crate::database::MainT;
|
||||
use heed::Result as ZResult;
|
||||
use meilisearch_schema::FieldId;
|
||||
|
||||
use super::DocumentFieldStoredKey;
|
||||
use crate::DocumentId;
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct DocumentsFields {
|
||||
pub(crate) documents_fields: heed::Database<OwnedType<DocumentFieldStoredKey>, ByteSlice>,
|
||||
}
|
||||
|
||||
impl DocumentsFields {
|
||||
pub fn put_document_field(
|
||||
self,
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
document_id: DocumentId,
|
||||
field: FieldId,
|
||||
value: &[u8],
|
||||
) -> ZResult<()> {
|
||||
let key = DocumentFieldStoredKey::new(document_id, field);
|
||||
self.documents_fields.put(writer, &key, value)
|
||||
}
|
||||
|
||||
pub fn del_all_document_fields(
|
||||
self,
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
document_id: DocumentId,
|
||||
) -> ZResult<usize> {
|
||||
let start = DocumentFieldStoredKey::new(document_id, FieldId::min());
|
||||
let end = DocumentFieldStoredKey::new(document_id, FieldId::max());
|
||||
self.documents_fields.delete_range(writer, &(start..=end))
|
||||
}
|
||||
|
||||
pub fn clear(self, writer: &mut heed::RwTxn<MainT>) -> ZResult<()> {
|
||||
self.documents_fields.clear(writer)
|
||||
}
|
||||
|
||||
pub fn document_attribute<'txn>(
|
||||
self,
|
||||
reader: &'txn heed::RoTxn<MainT>,
|
||||
document_id: DocumentId,
|
||||
field: FieldId,
|
||||
) -> ZResult<Option<&'txn [u8]>> {
|
||||
let key = DocumentFieldStoredKey::new(document_id, field);
|
||||
self.documents_fields.get(reader, &key)
|
||||
}
|
||||
|
||||
pub fn document_fields<'txn>(
|
||||
self,
|
||||
reader: &'txn heed::RoTxn<MainT>,
|
||||
document_id: DocumentId,
|
||||
) -> ZResult<DocumentFieldsIter<'txn>> {
|
||||
let start = DocumentFieldStoredKey::new(document_id, FieldId::min());
|
||||
let end = DocumentFieldStoredKey::new(document_id, FieldId::max());
|
||||
let iter = self.documents_fields.range(reader, &(start..=end))?;
|
||||
Ok(DocumentFieldsIter { iter })
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DocumentFieldsIter<'txn> {
|
||||
iter: heed::RoRange<'txn, OwnedType<DocumentFieldStoredKey>, ByteSlice>,
|
||||
}
|
||||
|
||||
impl<'txn> Iterator for DocumentFieldsIter<'txn> {
|
||||
type Item = ZResult<(FieldId, &'txn [u8])>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match self.iter.next() {
|
||||
Some(Ok((key, bytes))) => {
|
||||
let field_id = FieldId(key.field_id.get());
|
||||
Some(Ok((field_id, bytes)))
|
||||
}
|
||||
Some(Err(e)) => Some(Err(e)),
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
}
|
@ -1,143 +0,0 @@
|
||||
use super::DocumentFieldIndexedKey;
|
||||
use crate::database::MainT;
|
||||
use crate::DocumentId;
|
||||
use heed::types::OwnedType;
|
||||
use heed::Result as ZResult;
|
||||
use meilisearch_schema::IndexedPos;
|
||||
use crate::MResult;
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct DocumentsFieldsCounts {
|
||||
pub(crate) documents_fields_counts: heed::Database<OwnedType<DocumentFieldIndexedKey>, OwnedType<u16>>,
|
||||
}
|
||||
|
||||
impl DocumentsFieldsCounts {
|
||||
pub fn put_document_field_count(
|
||||
self,
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
document_id: DocumentId,
|
||||
attribute: IndexedPos,
|
||||
value: u16,
|
||||
) -> ZResult<()> {
|
||||
let key = DocumentFieldIndexedKey::new(document_id, attribute);
|
||||
self.documents_fields_counts.put(writer, &key, &value)
|
||||
}
|
||||
|
||||
pub fn del_all_document_fields_counts(
|
||||
self,
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
document_id: DocumentId,
|
||||
) -> ZResult<usize> {
|
||||
let start = DocumentFieldIndexedKey::new(document_id, IndexedPos::min());
|
||||
let end = DocumentFieldIndexedKey::new(document_id, IndexedPos::max());
|
||||
self.documents_fields_counts.delete_range(writer, &(start..=end))
|
||||
}
|
||||
|
||||
pub fn clear(self, writer: &mut heed::RwTxn<MainT>) -> ZResult<()> {
|
||||
self.documents_fields_counts.clear(writer)
|
||||
}
|
||||
|
||||
pub fn document_field_count(
|
||||
self,
|
||||
reader: &heed::RoTxn<MainT>,
|
||||
document_id: DocumentId,
|
||||
attribute: IndexedPos,
|
||||
) -> ZResult<Option<u16>> {
|
||||
let key = DocumentFieldIndexedKey::new(document_id, attribute);
|
||||
match self.documents_fields_counts.get(reader, &key)? {
|
||||
Some(count) => Ok(Some(count)),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn document_fields_counts<'txn>(
|
||||
self,
|
||||
reader: &'txn heed::RoTxn<MainT>,
|
||||
document_id: DocumentId,
|
||||
) -> ZResult<DocumentFieldsCountsIter<'txn>> {
|
||||
let start = DocumentFieldIndexedKey::new(document_id, IndexedPos::min());
|
||||
let end = DocumentFieldIndexedKey::new(document_id, IndexedPos::max());
|
||||
let iter = self.documents_fields_counts.range(reader, &(start..=end))?;
|
||||
Ok(DocumentFieldsCountsIter { iter })
|
||||
}
|
||||
|
||||
pub fn documents_ids<'txn>(self, reader: &'txn heed::RoTxn<MainT>) -> MResult<DocumentsIdsIter<'txn>> {
|
||||
let iter = self.documents_fields_counts.iter(reader)?;
|
||||
Ok(DocumentsIdsIter {
|
||||
last_seen_id: None,
|
||||
iter,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn all_documents_fields_counts<'txn>(
|
||||
self,
|
||||
reader: &'txn heed::RoTxn<MainT>,
|
||||
) -> ZResult<AllDocumentsFieldsCountsIter<'txn>> {
|
||||
let iter = self.documents_fields_counts.iter(reader)?;
|
||||
Ok(AllDocumentsFieldsCountsIter { iter })
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DocumentFieldsCountsIter<'txn> {
|
||||
iter: heed::RoRange<'txn, OwnedType<DocumentFieldIndexedKey>, OwnedType<u16>>,
|
||||
}
|
||||
|
||||
impl Iterator for DocumentFieldsCountsIter<'_> {
|
||||
type Item = ZResult<(IndexedPos, u16)>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match self.iter.next() {
|
||||
Some(Ok((key, count))) => {
|
||||
let indexed_pos = IndexedPos(key.indexed_pos.get());
|
||||
Some(Ok((indexed_pos, count)))
|
||||
}
|
||||
Some(Err(e)) => Some(Err(e)),
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DocumentsIdsIter<'txn> {
|
||||
last_seen_id: Option<DocumentId>,
|
||||
iter: heed::RoIter<'txn, OwnedType<DocumentFieldIndexedKey>, OwnedType<u16>>,
|
||||
}
|
||||
|
||||
impl Iterator for DocumentsIdsIter<'_> {
|
||||
type Item = MResult<DocumentId>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
for result in &mut self.iter {
|
||||
match result {
|
||||
Ok((key, _)) => {
|
||||
let document_id = DocumentId(key.docid.get());
|
||||
if Some(document_id) != self.last_seen_id {
|
||||
self.last_seen_id = Some(document_id);
|
||||
return Some(Ok(document_id));
|
||||
}
|
||||
}
|
||||
Err(e) => return Some(Err(e.into())),
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub struct AllDocumentsFieldsCountsIter<'txn> {
|
||||
iter: heed::RoIter<'txn, OwnedType<DocumentFieldIndexedKey>, OwnedType<u16>>,
|
||||
}
|
||||
|
||||
impl Iterator for AllDocumentsFieldsCountsIter<'_> {
|
||||
type Item = ZResult<(DocumentId, IndexedPos, u16)>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match self.iter.next() {
|
||||
Some(Ok((key, count))) => {
|
||||
let docid = DocumentId(key.docid.get());
|
||||
let indexed_pos = IndexedPos(key.indexed_pos.get());
|
||||
Some(Ok((docid, indexed_pos, count)))
|
||||
}
|
||||
Some(Err(e)) => Some(Err(e)),
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
}
|
@ -1,75 +0,0 @@
|
||||
use std::borrow::Cow;
|
||||
|
||||
use heed::{BytesDecode, BytesEncode};
|
||||
use sdset::Set;
|
||||
|
||||
use crate::DocumentId;
|
||||
use super::cow_set::CowSet;
|
||||
|
||||
pub struct DocumentsIds;
|
||||
|
||||
impl BytesEncode<'_> for DocumentsIds {
|
||||
type EItem = Set<DocumentId>;
|
||||
|
||||
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
CowSet::bytes_encode(item)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> BytesDecode<'a> for DocumentsIds {
|
||||
type DItem = Cow<'a, Set<DocumentId>>;
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
CowSet::bytes_decode(bytes)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DiscoverIds<'a> {
|
||||
ids_iter: std::slice::Iter<'a, DocumentId>,
|
||||
left_id: Option<u32>,
|
||||
right_id: Option<u32>,
|
||||
available_range: std::ops::Range<u32>,
|
||||
}
|
||||
|
||||
impl DiscoverIds<'_> {
|
||||
pub fn new(ids: &Set<DocumentId>) -> DiscoverIds {
|
||||
let mut ids_iter = ids.iter();
|
||||
let right_id = ids_iter.next().map(|id| id.0);
|
||||
let available_range = 0..right_id.unwrap_or(u32::max_value());
|
||||
DiscoverIds { ids_iter, left_id: None, right_id, available_range }
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for DiscoverIds<'_> {
|
||||
type Item = DocumentId;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
loop {
|
||||
match self.available_range.next() {
|
||||
// The available range gives us a new id, we return it.
|
||||
Some(id) => return Some(DocumentId(id)),
|
||||
// The available range is exhausted, we need to find the next one.
|
||||
None if self.available_range.end == u32::max_value() => return None,
|
||||
None => loop {
|
||||
self.left_id = self.right_id.take();
|
||||
self.right_id = self.ids_iter.next().map(|id| id.0);
|
||||
match (self.left_id, self.right_id) {
|
||||
// We found a gap in the used ids, we can yield all ids
|
||||
// until the end of the gap
|
||||
(Some(l), Some(r)) => if l.saturating_add(1) != r {
|
||||
self.available_range = (l + 1)..r;
|
||||
break;
|
||||
},
|
||||
// The last used id has been reached, we can use all ids
|
||||
// until u32 MAX
|
||||
(Some(l), None) => {
|
||||
self.available_range = l.saturating_add(1)..u32::max_value();
|
||||
break;
|
||||
},
|
||||
_ => (),
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,97 +0,0 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::mem;
|
||||
|
||||
use heed::{RwTxn, RoTxn, RoPrefix, types::Str, BytesEncode, BytesDecode};
|
||||
use sdset::{SetBuf, Set, SetOperation};
|
||||
|
||||
use meilisearch_types::DocumentId;
|
||||
use meilisearch_schema::FieldId;
|
||||
|
||||
use crate::MResult;
|
||||
use crate::database::MainT;
|
||||
use crate::facets::FacetKey;
|
||||
use super::cow_set::CowSet;
|
||||
|
||||
/// contains facet info
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct Facets {
|
||||
pub(crate) facets: heed::Database<FacetKey, FacetData>,
|
||||
}
|
||||
|
||||
pub struct FacetData;
|
||||
|
||||
impl<'a> BytesEncode<'a> for FacetData {
|
||||
type EItem = (&'a str, &'a Set<DocumentId>);
|
||||
|
||||
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||
// get size of the first item
|
||||
let first_size = item.0.as_bytes().len();
|
||||
let size = mem::size_of::<u64>()
|
||||
+ first_size
|
||||
+ item.1.len() * mem::size_of::<DocumentId>();
|
||||
let mut buffer = Vec::with_capacity(size);
|
||||
// encode the length of the first item
|
||||
buffer.extend_from_slice(&first_size.to_be_bytes());
|
||||
buffer.extend_from_slice(Str::bytes_encode(&item.0)?.as_ref());
|
||||
let second_slice = CowSet::bytes_encode(&item.1)?;
|
||||
buffer.extend_from_slice(second_slice.as_ref());
|
||||
Some(Cow::Owned(buffer))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> BytesDecode<'a> for FacetData {
|
||||
type DItem = (&'a str, Cow<'a, Set<DocumentId>>);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
const LEN: usize = mem::size_of::<u64>();
|
||||
let mut size_buf = [0; LEN];
|
||||
size_buf.copy_from_slice(bytes.get(0..LEN)?);
|
||||
// decode size of the first item from the bytes
|
||||
let first_size = u64::from_be_bytes(size_buf);
|
||||
// decode first and second items
|
||||
let first_item = Str::bytes_decode(bytes.get(LEN..(LEN + first_size as usize))?)?;
|
||||
let second_item = CowSet::bytes_decode(bytes.get((LEN + first_size as usize)..)?)?;
|
||||
Some((first_item, second_item))
|
||||
}
|
||||
}
|
||||
|
||||
impl Facets {
|
||||
// we use sdset::SetBuf to ensure the docids are sorted.
|
||||
pub fn put_facet_document_ids(&self, writer: &mut RwTxn<MainT>, facet_key: FacetKey, doc_ids: &Set<DocumentId>, facet_value: &str) -> MResult<()> {
|
||||
Ok(self.facets.put(writer, &facet_key, &(facet_value, doc_ids))?)
|
||||
}
|
||||
|
||||
pub fn field_document_ids<'txn>(&self, reader: &'txn RoTxn<MainT>, field_id: FieldId) -> MResult<RoPrefix<'txn, FacetKey, FacetData>> {
|
||||
Ok(self.facets.prefix_iter(reader, &FacetKey::new(field_id, String::new()))?)
|
||||
}
|
||||
|
||||
pub fn facet_document_ids<'txn>(&self, reader: &'txn RoTxn<MainT>, facet_key: &FacetKey) -> MResult<Option<(&'txn str,Cow<'txn, Set<DocumentId>>)>> {
|
||||
Ok(self.facets.get(reader, &facet_key)?)
|
||||
}
|
||||
|
||||
/// updates the facets store, revmoving the documents from the facets provided in the
|
||||
/// `facet_map` argument
|
||||
pub fn remove(&self, writer: &mut RwTxn<MainT>, facet_map: HashMap<FacetKey, (String, Vec<DocumentId>)>) -> MResult<()> {
|
||||
for (key, (name, document_ids)) in facet_map {
|
||||
if let Some((_, old)) = self.facets.get(writer, &key)? {
|
||||
let to_remove = SetBuf::from_dirty(document_ids);
|
||||
let new = sdset::duo::OpBuilder::new(old.as_ref(), to_remove.as_set()).difference().into_set_buf();
|
||||
self.facets.put(writer, &key, &(&name, new.as_set()))?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn add(&self, writer: &mut RwTxn<MainT>, facet_map: HashMap<FacetKey, (String, Vec<DocumentId>)>) -> MResult<()> {
|
||||
for (key, (facet_name, document_ids)) in facet_map {
|
||||
let set = SetBuf::from_dirty(document_ids);
|
||||
self.put_facet_document_ids(writer, key, set.as_set(), &facet_name)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn clear(self, writer: &mut heed::RwTxn<MainT>) -> MResult<()> {
|
||||
Ok(self.facets.clear(writer)?)
|
||||
}
|
||||
}
|
@ -1,320 +0,0 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use heed::types::{ByteSlice, OwnedType, SerdeBincode, Str, CowSlice};
|
||||
use meilisearch_schema::{FieldId, Schema};
|
||||
use meilisearch_types::DocumentId;
|
||||
use sdset::Set;
|
||||
|
||||
use crate::database::MainT;
|
||||
use crate::{RankedMap, MResult};
|
||||
use crate::settings::RankingRule;
|
||||
use crate::{FstSetCow, FstMapCow};
|
||||
use super::{CowSet, DocumentsIds};
|
||||
|
||||
const ATTRIBUTES_FOR_FACETING_KEY: &str = "attributes-for-faceting";
|
||||
const CREATED_AT_KEY: &str = "created-at";
|
||||
const CUSTOMS_KEY: &str = "customs";
|
||||
const DISTINCT_ATTRIBUTE_KEY: &str = "distinct-attribute";
|
||||
const EXTERNAL_DOCIDS_KEY: &str = "external-docids";
|
||||
const FIELDS_DISTRIBUTION_KEY: &str = "fields-distribution";
|
||||
const INTERNAL_DOCIDS_KEY: &str = "internal-docids";
|
||||
const NAME_KEY: &str = "name";
|
||||
const NUMBER_OF_DOCUMENTS_KEY: &str = "number-of-documents";
|
||||
const RANKED_MAP_KEY: &str = "ranked-map";
|
||||
const RANKING_RULES_KEY: &str = "ranking-rules";
|
||||
const SCHEMA_KEY: &str = "schema";
|
||||
const SORTED_DOCUMENT_IDS_CACHE_KEY: &str = "sorted-document-ids-cache";
|
||||
const STOP_WORDS_KEY: &str = "stop-words";
|
||||
const SYNONYMS_KEY: &str = "synonyms";
|
||||
const UPDATED_AT_KEY: &str = "updated-at";
|
||||
const WORDS_KEY: &str = "words";
|
||||
|
||||
pub type FreqsMap = BTreeMap<String, usize>;
|
||||
type SerdeFreqsMap = SerdeBincode<FreqsMap>;
|
||||
type SerdeDatetime = SerdeBincode<DateTime<Utc>>;
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct Main {
|
||||
pub(crate) main: heed::PolyDatabase,
|
||||
}
|
||||
|
||||
impl Main {
|
||||
pub fn clear(self, writer: &mut heed::RwTxn<MainT>) -> MResult<()> {
|
||||
Ok(self.main.clear(writer)?)
|
||||
}
|
||||
|
||||
pub fn put_name(self, writer: &mut heed::RwTxn<MainT>, name: &str) -> MResult<()> {
|
||||
Ok(self.main.put::<_, Str, Str>(writer, NAME_KEY, name)?)
|
||||
}
|
||||
|
||||
pub fn name(self, reader: &heed::RoTxn<MainT>) -> MResult<Option<String>> {
|
||||
Ok(self
|
||||
.main
|
||||
.get::<_, Str, Str>(reader, NAME_KEY)?
|
||||
.map(|name| name.to_owned()))
|
||||
}
|
||||
|
||||
pub fn put_created_at(self, writer: &mut heed::RwTxn<MainT>) -> MResult<()> {
|
||||
Ok(self.main.put::<_, Str, SerdeDatetime>(writer, CREATED_AT_KEY, &Utc::now())?)
|
||||
}
|
||||
|
||||
pub fn created_at(self, reader: &heed::RoTxn<MainT>) -> MResult<Option<DateTime<Utc>>> {
|
||||
Ok(self.main.get::<_, Str, SerdeDatetime>(reader, CREATED_AT_KEY)?)
|
||||
}
|
||||
|
||||
pub fn put_updated_at(self, writer: &mut heed::RwTxn<MainT>) -> MResult<()> {
|
||||
Ok(self.main.put::<_, Str, SerdeDatetime>(writer, UPDATED_AT_KEY, &Utc::now())?)
|
||||
}
|
||||
|
||||
pub fn updated_at(self, reader: &heed::RoTxn<MainT>) -> MResult<Option<DateTime<Utc>>> {
|
||||
Ok(self.main.get::<_, Str, SerdeDatetime>(reader, UPDATED_AT_KEY)?)
|
||||
}
|
||||
|
||||
pub fn put_internal_docids(self, writer: &mut heed::RwTxn<MainT>, ids: &sdset::Set<DocumentId>) -> MResult<()> {
|
||||
Ok(self.main.put::<_, Str, DocumentsIds>(writer, INTERNAL_DOCIDS_KEY, ids)?)
|
||||
}
|
||||
|
||||
pub fn internal_docids<'txn>(self, reader: &'txn heed::RoTxn<MainT>) -> MResult<Cow<'txn, sdset::Set<DocumentId>>> {
|
||||
match self.main.get::<_, Str, DocumentsIds>(reader, INTERNAL_DOCIDS_KEY)? {
|
||||
Some(ids) => Ok(ids),
|
||||
None => Ok(Cow::default()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn merge_internal_docids(self, writer: &mut heed::RwTxn<MainT>, new_ids: &sdset::Set<DocumentId>) -> MResult<()> {
|
||||
use sdset::SetOperation;
|
||||
|
||||
// We do an union of the old and new internal ids.
|
||||
let internal_docids = self.internal_docids(writer)?;
|
||||
let internal_docids = sdset::duo::Union::new(&internal_docids, new_ids).into_set_buf();
|
||||
Ok(self.put_internal_docids(writer, &internal_docids)?)
|
||||
}
|
||||
|
||||
pub fn remove_internal_docids(self, writer: &mut heed::RwTxn<MainT>, ids: &sdset::Set<DocumentId>) -> MResult<()> {
|
||||
use sdset::SetOperation;
|
||||
|
||||
// We do a difference of the old and new internal ids.
|
||||
let internal_docids = self.internal_docids(writer)?;
|
||||
let internal_docids = sdset::duo::Difference::new(&internal_docids, ids).into_set_buf();
|
||||
Ok(self.put_internal_docids(writer, &internal_docids)?)
|
||||
}
|
||||
|
||||
pub fn put_external_docids<A>(self, writer: &mut heed::RwTxn<MainT>, ids: &fst::Map<A>) -> MResult<()>
|
||||
where A: AsRef<[u8]>,
|
||||
{
|
||||
Ok(self.main.put::<_, Str, ByteSlice>(writer, EXTERNAL_DOCIDS_KEY, ids.as_fst().as_bytes())?)
|
||||
}
|
||||
|
||||
pub fn merge_external_docids<A>(self, writer: &mut heed::RwTxn<MainT>, new_docids: &fst::Map<A>) -> MResult<()>
|
||||
where A: AsRef<[u8]>,
|
||||
{
|
||||
use fst::{Streamer, IntoStreamer};
|
||||
|
||||
// Do an union of the old and the new set of external docids.
|
||||
let external_docids = self.external_docids(writer)?;
|
||||
let mut op = external_docids.op().add(new_docids.into_stream()).r#union();
|
||||
let mut build = fst::MapBuilder::memory();
|
||||
while let Some((docid, values)) = op.next() {
|
||||
build.insert(docid, values[0].value).unwrap();
|
||||
}
|
||||
drop(op);
|
||||
|
||||
let external_docids = build.into_map();
|
||||
Ok(self.put_external_docids(writer, &external_docids)?)
|
||||
}
|
||||
|
||||
pub fn remove_external_docids<A>(self, writer: &mut heed::RwTxn<MainT>, ids: &fst::Map<A>) -> MResult<()>
|
||||
where A: AsRef<[u8]>,
|
||||
{
|
||||
use fst::{Streamer, IntoStreamer};
|
||||
|
||||
// Do an union of the old and the new set of external docids.
|
||||
let external_docids = self.external_docids(writer)?;
|
||||
let mut op = external_docids.op().add(ids.into_stream()).difference();
|
||||
let mut build = fst::MapBuilder::memory();
|
||||
while let Some((docid, values)) = op.next() {
|
||||
build.insert(docid, values[0].value).unwrap();
|
||||
}
|
||||
drop(op);
|
||||
|
||||
let external_docids = build.into_map();
|
||||
self.put_external_docids(writer, &external_docids)
|
||||
}
|
||||
|
||||
pub fn external_docids<'a>(self, reader: &'a heed::RoTxn<'a, MainT>) -> MResult<FstMapCow> {
|
||||
match self.main.get::<_, Str, ByteSlice>(reader, EXTERNAL_DOCIDS_KEY)? {
|
||||
Some(bytes) => Ok(fst::Map::new(bytes).unwrap().map_data(Cow::Borrowed).unwrap()),
|
||||
None => Ok(fst::Map::default().map_data(Cow::Owned).unwrap()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn external_to_internal_docid(self, reader: &heed::RoTxn<MainT>, external_docid: &str) -> MResult<Option<DocumentId>> {
|
||||
let external_ids = self.external_docids(reader)?;
|
||||
Ok(external_ids.get(external_docid).map(|id| DocumentId(id as u32)))
|
||||
}
|
||||
|
||||
pub fn words_fst<'a>(self, reader: &'a heed::RoTxn<'a, MainT>) -> MResult<FstSetCow> {
|
||||
match self.main.get::<_, Str, ByteSlice>(reader, WORDS_KEY)? {
|
||||
Some(bytes) => Ok(fst::Set::new(bytes).unwrap().map_data(Cow::Borrowed).unwrap()),
|
||||
None => Ok(fst::Set::default().map_data(Cow::Owned).unwrap()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn put_words_fst<A: AsRef<[u8]>>(self, writer: &mut heed::RwTxn<MainT>, fst: &fst::Set<A>) -> MResult<()> {
|
||||
Ok(self.main.put::<_, Str, ByteSlice>(writer, WORDS_KEY, fst.as_fst().as_bytes())?)
|
||||
}
|
||||
|
||||
pub fn put_sorted_document_ids_cache(self, writer: &mut heed::RwTxn<MainT>, documents_ids: &[DocumentId]) -> MResult<()> {
|
||||
Ok(self.main.put::<_, Str, CowSlice<DocumentId>>(writer, SORTED_DOCUMENT_IDS_CACHE_KEY, documents_ids)?)
|
||||
}
|
||||
|
||||
pub fn sorted_document_ids_cache<'a>(self, reader: &'a heed::RoTxn<'a, MainT>) -> MResult<Option<Cow<[DocumentId]>>> {
|
||||
Ok(self.main.get::<_, Str, CowSlice<DocumentId>>(reader, SORTED_DOCUMENT_IDS_CACHE_KEY)?)
|
||||
}
|
||||
|
||||
pub fn put_schema(self, writer: &mut heed::RwTxn<MainT>, schema: &Schema) -> MResult<()> {
|
||||
Ok(self.main.put::<_, Str, SerdeBincode<Schema>>(writer, SCHEMA_KEY, schema)?)
|
||||
}
|
||||
|
||||
pub fn schema(self, reader: &heed::RoTxn<MainT>) -> MResult<Option<Schema>> {
|
||||
Ok(self.main.get::<_, Str, SerdeBincode<Schema>>(reader, SCHEMA_KEY)?)
|
||||
}
|
||||
|
||||
pub fn delete_schema(self, writer: &mut heed::RwTxn<MainT>) -> MResult<bool> {
|
||||
Ok(self.main.delete::<_, Str>(writer, SCHEMA_KEY)?)
|
||||
}
|
||||
|
||||
pub fn put_ranked_map(self, writer: &mut heed::RwTxn<MainT>, ranked_map: &RankedMap) -> MResult<()> {
|
||||
Ok(self.main.put::<_, Str, SerdeBincode<RankedMap>>(writer, RANKED_MAP_KEY, &ranked_map)?)
|
||||
}
|
||||
|
||||
pub fn ranked_map(self, reader: &heed::RoTxn<MainT>) -> MResult<Option<RankedMap>> {
|
||||
Ok(self.main.get::<_, Str, SerdeBincode<RankedMap>>(reader, RANKED_MAP_KEY)?)
|
||||
}
|
||||
|
||||
pub fn put_synonyms_fst<A: AsRef<[u8]>>(self, writer: &mut heed::RwTxn<MainT>, fst: &fst::Set<A>) -> MResult<()> {
|
||||
let bytes = fst.as_fst().as_bytes();
|
||||
Ok(self.main.put::<_, Str, ByteSlice>(writer, SYNONYMS_KEY, bytes)?)
|
||||
}
|
||||
|
||||
pub(crate) fn synonyms_fst<'a>(self, reader: &'a heed::RoTxn<'a, MainT>) -> MResult<FstSetCow> {
|
||||
match self.main.get::<_, Str, ByteSlice>(reader, SYNONYMS_KEY)? {
|
||||
Some(bytes) => Ok(fst::Set::new(bytes).unwrap().map_data(Cow::Borrowed).unwrap()),
|
||||
None => Ok(fst::Set::default().map_data(Cow::Owned).unwrap()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn synonyms(self, reader: &heed::RoTxn<MainT>) -> MResult<Vec<String>> {
|
||||
let synonyms = self
|
||||
.synonyms_fst(&reader)?
|
||||
.stream()
|
||||
.into_strs()?;
|
||||
Ok(synonyms)
|
||||
}
|
||||
|
||||
pub fn put_stop_words_fst<A: AsRef<[u8]>>(self, writer: &mut heed::RwTxn<MainT>, fst: &fst::Set<A>) -> MResult<()> {
|
||||
let bytes = fst.as_fst().as_bytes();
|
||||
Ok(self.main.put::<_, Str, ByteSlice>(writer, STOP_WORDS_KEY, bytes)?)
|
||||
}
|
||||
|
||||
pub(crate) fn stop_words_fst<'a>(self, reader: &'a heed::RoTxn<'a, MainT>) -> MResult<FstSetCow> {
|
||||
match self.main.get::<_, Str, ByteSlice>(reader, STOP_WORDS_KEY)? {
|
||||
Some(bytes) => Ok(fst::Set::new(bytes).unwrap().map_data(Cow::Borrowed).unwrap()),
|
||||
None => Ok(fst::Set::default().map_data(Cow::Owned).unwrap()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn stop_words(self, reader: &heed::RoTxn<MainT>) -> MResult<Vec<String>> {
|
||||
let stop_word_list = self
|
||||
.stop_words_fst(reader)?
|
||||
.stream()
|
||||
.into_strs()?;
|
||||
Ok(stop_word_list)
|
||||
}
|
||||
|
||||
pub fn put_number_of_documents<F>(self, writer: &mut heed::RwTxn<MainT>, f: F) -> MResult<u64>
|
||||
where
|
||||
F: Fn(u64) -> u64,
|
||||
{
|
||||
let new = self.number_of_documents(&*writer).map(f)?;
|
||||
self.main
|
||||
.put::<_, Str, OwnedType<u64>>(writer, NUMBER_OF_DOCUMENTS_KEY, &new)?;
|
||||
Ok(new)
|
||||
}
|
||||
|
||||
pub fn number_of_documents(self, reader: &heed::RoTxn<MainT>) -> MResult<u64> {
|
||||
match self
|
||||
.main
|
||||
.get::<_, Str, OwnedType<u64>>(reader, NUMBER_OF_DOCUMENTS_KEY)? {
|
||||
Some(value) => Ok(value),
|
||||
None => Ok(0),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn put_fields_distribution(
|
||||
self,
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
fields_frequency: &FreqsMap,
|
||||
) -> MResult<()> {
|
||||
Ok(self.main.put::<_, Str, SerdeFreqsMap>(writer, FIELDS_DISTRIBUTION_KEY, fields_frequency)?)
|
||||
}
|
||||
|
||||
pub fn fields_distribution(&self, reader: &heed::RoTxn<MainT>) -> MResult<Option<FreqsMap>> {
|
||||
match self
|
||||
.main
|
||||
.get::<_, Str, SerdeFreqsMap>(reader, FIELDS_DISTRIBUTION_KEY)?
|
||||
{
|
||||
Some(freqs) => Ok(Some(freqs)),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn attributes_for_faceting<'txn>(&self, reader: &'txn heed::RoTxn<MainT>) -> MResult<Option<Cow<'txn, Set<FieldId>>>> {
|
||||
Ok(self.main.get::<_, Str, CowSet<FieldId>>(reader, ATTRIBUTES_FOR_FACETING_KEY)?)
|
||||
}
|
||||
|
||||
pub fn put_attributes_for_faceting(self, writer: &mut heed::RwTxn<MainT>, attributes: &Set<FieldId>) -> MResult<()> {
|
||||
Ok(self.main.put::<_, Str, CowSet<FieldId>>(writer, ATTRIBUTES_FOR_FACETING_KEY, attributes)?)
|
||||
}
|
||||
|
||||
pub fn delete_attributes_for_faceting(self, writer: &mut heed::RwTxn<MainT>) -> MResult<bool> {
|
||||
Ok(self.main.delete::<_, Str>(writer, ATTRIBUTES_FOR_FACETING_KEY)?)
|
||||
}
|
||||
|
||||
pub fn ranking_rules(&self, reader: &heed::RoTxn<MainT>) -> MResult<Option<Vec<RankingRule>>> {
|
||||
Ok(self.main.get::<_, Str, SerdeBincode<Vec<RankingRule>>>(reader, RANKING_RULES_KEY)?)
|
||||
}
|
||||
|
||||
pub fn put_ranking_rules(self, writer: &mut heed::RwTxn<MainT>, value: &[RankingRule]) -> MResult<()> {
|
||||
Ok(self.main.put::<_, Str, SerdeBincode<Vec<RankingRule>>>(writer, RANKING_RULES_KEY, &value.to_vec())?)
|
||||
}
|
||||
|
||||
pub fn delete_ranking_rules(self, writer: &mut heed::RwTxn<MainT>) -> MResult<bool> {
|
||||
Ok(self.main.delete::<_, Str>(writer, RANKING_RULES_KEY)?)
|
||||
}
|
||||
|
||||
pub fn distinct_attribute(&self, reader: &heed::RoTxn<MainT>) -> MResult<Option<FieldId>> {
|
||||
match self.main.get::<_, Str, OwnedType<u16>>(reader, DISTINCT_ATTRIBUTE_KEY)? {
|
||||
Some(value) => Ok(Some(FieldId(value.to_owned()))),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn put_distinct_attribute(self, writer: &mut heed::RwTxn<MainT>, value: FieldId) -> MResult<()> {
|
||||
Ok(self.main.put::<_, Str, OwnedType<u16>>(writer, DISTINCT_ATTRIBUTE_KEY, &value.0)?)
|
||||
}
|
||||
|
||||
pub fn delete_distinct_attribute(self, writer: &mut heed::RwTxn<MainT>) -> MResult<bool> {
|
||||
Ok(self.main.delete::<_, Str>(writer, DISTINCT_ATTRIBUTE_KEY)?)
|
||||
}
|
||||
|
||||
pub fn put_customs(self, writer: &mut heed::RwTxn<MainT>, customs: &[u8]) -> MResult<()> {
|
||||
Ok(self.main.put::<_, Str, ByteSlice>(writer, CUSTOMS_KEY, customs)?)
|
||||
}
|
||||
|
||||
pub fn customs<'txn>(self, reader: &'txn heed::RoTxn<MainT>) -> MResult<Option<&'txn [u8]>> {
|
||||
Ok(self.main.get::<_, Str, ByteSlice>(reader, CUSTOMS_KEY)?)
|
||||
}
|
||||
}
|
@ -1,522 +0,0 @@
|
||||
mod cow_set;
|
||||
mod docs_words;
|
||||
mod documents_ids;
|
||||
mod documents_fields;
|
||||
mod documents_fields_counts;
|
||||
mod facets;
|
||||
mod main;
|
||||
mod postings_lists;
|
||||
mod prefix_documents_cache;
|
||||
mod prefix_postings_lists_cache;
|
||||
mod synonyms;
|
||||
mod updates;
|
||||
mod updates_results;
|
||||
|
||||
pub use self::cow_set::CowSet;
|
||||
pub use self::docs_words::DocsWords;
|
||||
pub use self::documents_fields::{DocumentFieldsIter, DocumentsFields};
|
||||
pub use self::documents_fields_counts::{DocumentFieldsCountsIter, DocumentsFieldsCounts, DocumentsIdsIter};
|
||||
pub use self::documents_ids::{DocumentsIds, DiscoverIds};
|
||||
pub use self::facets::Facets;
|
||||
pub use self::main::Main;
|
||||
pub use self::postings_lists::PostingsLists;
|
||||
pub use self::prefix_documents_cache::PrefixDocumentsCache;
|
||||
pub use self::prefix_postings_lists_cache::PrefixPostingsListsCache;
|
||||
pub use self::synonyms::Synonyms;
|
||||
pub use self::updates::Updates;
|
||||
pub use self::updates_results::UpdatesResults;
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashSet;
|
||||
use std::convert::TryInto;
|
||||
use std::{mem, ptr};
|
||||
|
||||
use heed::{BytesEncode, BytesDecode};
|
||||
use meilisearch_schema::{IndexedPos, FieldId};
|
||||
use sdset::{Set, SetBuf};
|
||||
use serde::de::{self, Deserialize};
|
||||
use zerocopy::{AsBytes, FromBytes};
|
||||
|
||||
use crate::criterion::Criteria;
|
||||
use crate::database::{MainT, UpdateT};
|
||||
use crate::database::{UpdateEvent, UpdateEventsEmitter};
|
||||
use crate::serde::Deserializer;
|
||||
use crate::settings::SettingsUpdate;
|
||||
use crate::{query_builder::QueryBuilder, update, DocIndex, DocumentId, Error, MResult};
|
||||
|
||||
type BEU32 = zerocopy::U32<byteorder::BigEndian>;
|
||||
type BEU64 = zerocopy::U64<byteorder::BigEndian>;
|
||||
pub type BEU16 = zerocopy::U16<byteorder::BigEndian>;
|
||||
|
||||
#[derive(Debug, Copy, Clone, AsBytes, FromBytes)]
|
||||
#[repr(C)]
|
||||
pub struct DocumentFieldIndexedKey {
|
||||
docid: BEU32,
|
||||
indexed_pos: BEU16,
|
||||
}
|
||||
|
||||
impl DocumentFieldIndexedKey {
|
||||
fn new(docid: DocumentId, indexed_pos: IndexedPos) -> DocumentFieldIndexedKey {
|
||||
DocumentFieldIndexedKey {
|
||||
docid: BEU32::new(docid.0),
|
||||
indexed_pos: BEU16::new(indexed_pos.0),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone, AsBytes, FromBytes)]
|
||||
#[repr(C)]
|
||||
pub struct DocumentFieldStoredKey {
|
||||
docid: BEU32,
|
||||
field_id: BEU16,
|
||||
}
|
||||
|
||||
impl DocumentFieldStoredKey {
|
||||
fn new(docid: DocumentId, field_id: FieldId) -> DocumentFieldStoredKey {
|
||||
DocumentFieldStoredKey {
|
||||
docid: BEU32::new(docid.0),
|
||||
field_id: BEU16::new(field_id.0),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Debug)]
|
||||
pub struct Postings<'a> {
|
||||
pub docids: Cow<'a, Set<DocumentId>>,
|
||||
pub matches: Cow<'a, Set<DocIndex>>,
|
||||
}
|
||||
|
||||
pub struct PostingsCodec;
|
||||
|
||||
impl<'a> BytesEncode<'a> for PostingsCodec {
|
||||
type EItem = Postings<'a>;
|
||||
|
||||
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||
let u64_size = mem::size_of::<u64>();
|
||||
let docids_size = item.docids.len() * mem::size_of::<DocumentId>();
|
||||
let matches_size = item.matches.len() * mem::size_of::<DocIndex>();
|
||||
|
||||
let mut buffer = Vec::with_capacity(u64_size + docids_size + matches_size);
|
||||
|
||||
let docids_len = item.docids.len() as u64;
|
||||
buffer.extend_from_slice(&docids_len.to_be_bytes());
|
||||
buffer.extend_from_slice(item.docids.as_bytes());
|
||||
buffer.extend_from_slice(item.matches.as_bytes());
|
||||
|
||||
Some(Cow::Owned(buffer))
|
||||
}
|
||||
}
|
||||
|
||||
fn aligned_to(bytes: &[u8], align: usize) -> bool {
|
||||
(bytes as *const _ as *const () as usize) % align == 0
|
||||
}
|
||||
|
||||
fn from_bytes_to_set<'a, T: 'a>(bytes: &'a [u8]) -> Option<Cow<'a, Set<T>>>
|
||||
where T: Clone + FromBytes
|
||||
{
|
||||
match zerocopy::LayoutVerified::<_, [T]>::new_slice(bytes) {
|
||||
Some(layout) => Some(Cow::Borrowed(Set::new_unchecked(layout.into_slice()))),
|
||||
None => {
|
||||
let len = bytes.len();
|
||||
let elem_size = mem::size_of::<T>();
|
||||
|
||||
// ensure that it is the alignment that is wrong
|
||||
// and the length is valid
|
||||
if len % elem_size == 0 && !aligned_to(bytes, mem::align_of::<T>()) {
|
||||
let elems = len / elem_size;
|
||||
let mut vec = Vec::<T>::with_capacity(elems);
|
||||
|
||||
unsafe {
|
||||
let dst = vec.as_mut_ptr() as *mut u8;
|
||||
ptr::copy_nonoverlapping(bytes.as_ptr(), dst, len);
|
||||
vec.set_len(elems);
|
||||
}
|
||||
|
||||
return Some(Cow::Owned(SetBuf::new_unchecked(vec)));
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> BytesDecode<'a> for PostingsCodec {
|
||||
type DItem = Postings<'a>;
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let u64_size = mem::size_of::<u64>();
|
||||
let docid_size = mem::size_of::<DocumentId>();
|
||||
|
||||
let (len_bytes, bytes) = bytes.split_at(u64_size);
|
||||
let docids_len = len_bytes.try_into().ok().map(u64::from_be_bytes)? as usize;
|
||||
let docids_size = docids_len * docid_size;
|
||||
|
||||
let docids_bytes = &bytes[..docids_size];
|
||||
let matches_bytes = &bytes[docids_size..];
|
||||
|
||||
let docids = from_bytes_to_set(docids_bytes)?;
|
||||
let matches = from_bytes_to_set(matches_bytes)?;
|
||||
|
||||
Some(Postings { docids, matches })
|
||||
}
|
||||
}
|
||||
|
||||
fn main_name(name: &str) -> String {
|
||||
format!("store-{}", name)
|
||||
}
|
||||
|
||||
fn postings_lists_name(name: &str) -> String {
|
||||
format!("store-{}-postings-lists", name)
|
||||
}
|
||||
|
||||
fn documents_fields_name(name: &str) -> String {
|
||||
format!("store-{}-documents-fields", name)
|
||||
}
|
||||
|
||||
fn documents_fields_counts_name(name: &str) -> String {
|
||||
format!("store-{}-documents-fields-counts", name)
|
||||
}
|
||||
|
||||
fn synonyms_name(name: &str) -> String {
|
||||
format!("store-{}-synonyms", name)
|
||||
}
|
||||
|
||||
fn docs_words_name(name: &str) -> String {
|
||||
format!("store-{}-docs-words", name)
|
||||
}
|
||||
|
||||
fn prefix_documents_cache_name(name: &str) -> String {
|
||||
format!("store-{}-prefix-documents-cache", name)
|
||||
}
|
||||
|
||||
fn prefix_postings_lists_cache_name(name: &str) -> String {
|
||||
format!("store-{}-prefix-postings-lists-cache", name)
|
||||
}
|
||||
|
||||
fn updates_name(name: &str) -> String {
|
||||
format!("store-{}-updates", name)
|
||||
}
|
||||
|
||||
fn updates_results_name(name: &str) -> String {
|
||||
format!("store-{}-updates-results", name)
|
||||
}
|
||||
|
||||
fn facets_name(name: &str) -> String {
|
||||
format!("store-{}-facets", name)
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Index {
|
||||
pub main: Main,
|
||||
pub postings_lists: PostingsLists,
|
||||
pub documents_fields: DocumentsFields,
|
||||
pub documents_fields_counts: DocumentsFieldsCounts,
|
||||
pub facets: Facets,
|
||||
pub synonyms: Synonyms,
|
||||
pub docs_words: DocsWords,
|
||||
pub prefix_documents_cache: PrefixDocumentsCache,
|
||||
pub prefix_postings_lists_cache: PrefixPostingsListsCache,
|
||||
|
||||
pub updates: Updates,
|
||||
pub updates_results: UpdatesResults,
|
||||
pub(crate) updates_notifier: UpdateEventsEmitter,
|
||||
}
|
||||
|
||||
impl Index {
|
||||
pub fn document<T: de::DeserializeOwned>(
|
||||
&self,
|
||||
reader: &heed::RoTxn<MainT>,
|
||||
attributes: Option<&HashSet<&str>>,
|
||||
document_id: DocumentId,
|
||||
) -> MResult<Option<T>> {
|
||||
let schema = self.main.schema(reader)?;
|
||||
let schema = schema.ok_or(Error::SchemaMissing)?;
|
||||
|
||||
let attributes = match attributes {
|
||||
Some(attributes) => Some(attributes.iter().filter_map(|name| schema.id(*name)).collect()),
|
||||
None => None,
|
||||
};
|
||||
|
||||
let mut deserializer = Deserializer {
|
||||
document_id,
|
||||
reader,
|
||||
documents_fields: self.documents_fields,
|
||||
schema: &schema,
|
||||
fields: attributes.as_ref(),
|
||||
};
|
||||
|
||||
Ok(Option::<T>::deserialize(&mut deserializer)?)
|
||||
}
|
||||
|
||||
pub fn document_attribute<T: de::DeserializeOwned>(
|
||||
&self,
|
||||
reader: &heed::RoTxn<MainT>,
|
||||
document_id: DocumentId,
|
||||
attribute: FieldId,
|
||||
) -> MResult<Option<T>> {
|
||||
let bytes = self
|
||||
.documents_fields
|
||||
.document_attribute(reader, document_id, attribute)?;
|
||||
match bytes {
|
||||
Some(bytes) => Ok(Some(serde_json::from_slice(bytes)?)),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn document_attribute_bytes<'txn>(
|
||||
&self,
|
||||
reader: &'txn heed::RoTxn<MainT>,
|
||||
document_id: DocumentId,
|
||||
attribute: FieldId,
|
||||
) -> MResult<Option<&'txn [u8]>> {
|
||||
let bytes = self
|
||||
.documents_fields
|
||||
.document_attribute(reader, document_id, attribute)?;
|
||||
match bytes {
|
||||
Some(bytes) => Ok(Some(bytes)),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn customs_update(&self, writer: &mut heed::RwTxn<UpdateT>, customs: Vec<u8>) -> MResult<u64> {
|
||||
let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
|
||||
Ok(update::push_customs_update(writer, self.updates, self.updates_results, customs)?)
|
||||
}
|
||||
|
||||
pub fn settings_update(&self, writer: &mut heed::RwTxn<UpdateT>, update: SettingsUpdate) -> MResult<u64> {
|
||||
let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
|
||||
Ok(update::push_settings_update(writer, self.updates, self.updates_results, update)?)
|
||||
}
|
||||
|
||||
pub fn documents_addition<D>(&self) -> update::DocumentsAddition<D> {
|
||||
update::DocumentsAddition::new(
|
||||
self.updates,
|
||||
self.updates_results,
|
||||
self.updates_notifier.clone(),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn documents_partial_addition<D>(&self) -> update::DocumentsAddition<D> {
|
||||
update::DocumentsAddition::new_partial(
|
||||
self.updates,
|
||||
self.updates_results,
|
||||
self.updates_notifier.clone(),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn documents_deletion(&self) -> update::DocumentsDeletion {
|
||||
update::DocumentsDeletion::new(
|
||||
self.updates,
|
||||
self.updates_results,
|
||||
self.updates_notifier.clone(),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn clear_all(&self, writer: &mut heed::RwTxn<UpdateT>) -> MResult<u64> {
|
||||
let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
|
||||
update::push_clear_all(writer, self.updates, self.updates_results)
|
||||
}
|
||||
|
||||
pub fn current_update_id(&self, reader: &heed::RoTxn<UpdateT>) -> MResult<Option<u64>> {
|
||||
match self.updates.last_update(reader)? {
|
||||
Some((id, _)) => Ok(Some(id)),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn update_status(
|
||||
&self,
|
||||
reader: &heed::RoTxn<UpdateT>,
|
||||
update_id: u64,
|
||||
) -> MResult<Option<update::UpdateStatus>> {
|
||||
update::update_status(reader, self.updates, self.updates_results, update_id)
|
||||
}
|
||||
|
||||
pub fn all_updates_status(&self, reader: &heed::RoTxn<UpdateT>) -> MResult<Vec<update::UpdateStatus>> {
|
||||
let mut updates = Vec::new();
|
||||
let mut last_update_result_id = 0;
|
||||
|
||||
// retrieve all updates results
|
||||
if let Some((last_id, _)) = self.updates_results.last_update(reader)? {
|
||||
updates.reserve(last_id as usize);
|
||||
|
||||
for id in 0..=last_id {
|
||||
if let Some(update) = self.update_status(reader, id)? {
|
||||
updates.push(update);
|
||||
last_update_result_id = id + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// retrieve all enqueued updates
|
||||
if let Some((last_id, _)) = self.updates.last_update(reader)? {
|
||||
for id in last_update_result_id..=last_id {
|
||||
if let Some(update) = self.update_status(reader, id)? {
|
||||
updates.push(update);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(updates)
|
||||
}
|
||||
|
||||
pub fn query_builder(&self) -> QueryBuilder {
|
||||
QueryBuilder::new(self)
|
||||
}
|
||||
|
||||
pub fn query_builder_with_criteria<'c, 'f, 'd, 'i>(
|
||||
&'i self,
|
||||
criteria: Criteria<'c>,
|
||||
) -> QueryBuilder<'c, 'f, 'd, 'i> {
|
||||
QueryBuilder::with_criteria(self, criteria)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn create(
|
||||
env: &heed::Env,
|
||||
update_env: &heed::Env,
|
||||
name: &str,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
) -> MResult<Index> {
|
||||
// create all the store names
|
||||
let main_name = main_name(name);
|
||||
let postings_lists_name = postings_lists_name(name);
|
||||
let documents_fields_name = documents_fields_name(name);
|
||||
let documents_fields_counts_name = documents_fields_counts_name(name);
|
||||
let synonyms_name = synonyms_name(name);
|
||||
let docs_words_name = docs_words_name(name);
|
||||
let prefix_documents_cache_name = prefix_documents_cache_name(name);
|
||||
let prefix_postings_lists_cache_name = prefix_postings_lists_cache_name(name);
|
||||
let updates_name = updates_name(name);
|
||||
let updates_results_name = updates_results_name(name);
|
||||
let facets_name = facets_name(name);
|
||||
|
||||
// open all the stores
|
||||
let main = env.create_poly_database(Some(&main_name))?;
|
||||
let postings_lists = env.create_database(Some(&postings_lists_name))?;
|
||||
let documents_fields = env.create_database(Some(&documents_fields_name))?;
|
||||
let documents_fields_counts = env.create_database(Some(&documents_fields_counts_name))?;
|
||||
let facets = env.create_database(Some(&facets_name))?;
|
||||
let synonyms = env.create_database(Some(&synonyms_name))?;
|
||||
let docs_words = env.create_database(Some(&docs_words_name))?;
|
||||
let prefix_documents_cache = env.create_database(Some(&prefix_documents_cache_name))?;
|
||||
let prefix_postings_lists_cache = env.create_database(Some(&prefix_postings_lists_cache_name))?;
|
||||
let updates = update_env.create_database(Some(&updates_name))?;
|
||||
let updates_results = update_env.create_database(Some(&updates_results_name))?;
|
||||
|
||||
Ok(Index {
|
||||
main: Main { main },
|
||||
postings_lists: PostingsLists { postings_lists },
|
||||
documents_fields: DocumentsFields { documents_fields },
|
||||
documents_fields_counts: DocumentsFieldsCounts { documents_fields_counts },
|
||||
synonyms: Synonyms { synonyms },
|
||||
docs_words: DocsWords { docs_words },
|
||||
prefix_postings_lists_cache: PrefixPostingsListsCache { prefix_postings_lists_cache },
|
||||
prefix_documents_cache: PrefixDocumentsCache { prefix_documents_cache },
|
||||
facets: Facets { facets },
|
||||
|
||||
updates: Updates { updates },
|
||||
updates_results: UpdatesResults { updates_results },
|
||||
updates_notifier,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn open(
|
||||
env: &heed::Env,
|
||||
update_env: &heed::Env,
|
||||
name: &str,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
) -> MResult<Option<Index>> {
|
||||
// create all the store names
|
||||
let main_name = main_name(name);
|
||||
let postings_lists_name = postings_lists_name(name);
|
||||
let documents_fields_name = documents_fields_name(name);
|
||||
let documents_fields_counts_name = documents_fields_counts_name(name);
|
||||
let synonyms_name = synonyms_name(name);
|
||||
let docs_words_name = docs_words_name(name);
|
||||
let prefix_documents_cache_name = prefix_documents_cache_name(name);
|
||||
let facets_name = facets_name(name);
|
||||
let prefix_postings_lists_cache_name = prefix_postings_lists_cache_name(name);
|
||||
let updates_name = updates_name(name);
|
||||
let updates_results_name = updates_results_name(name);
|
||||
|
||||
// open all the stores
|
||||
let main = match env.open_poly_database(Some(&main_name))? {
|
||||
Some(main) => main,
|
||||
None => return Ok(None),
|
||||
};
|
||||
let postings_lists = match env.open_database(Some(&postings_lists_name))? {
|
||||
Some(postings_lists) => postings_lists,
|
||||
None => return Ok(None),
|
||||
};
|
||||
let documents_fields = match env.open_database(Some(&documents_fields_name))? {
|
||||
Some(documents_fields) => documents_fields,
|
||||
None => return Ok(None),
|
||||
};
|
||||
let documents_fields_counts = match env.open_database(Some(&documents_fields_counts_name))? {
|
||||
Some(documents_fields_counts) => documents_fields_counts,
|
||||
None => return Ok(None),
|
||||
};
|
||||
let synonyms = match env.open_database(Some(&synonyms_name))? {
|
||||
Some(synonyms) => synonyms,
|
||||
None => return Ok(None),
|
||||
};
|
||||
let docs_words = match env.open_database(Some(&docs_words_name))? {
|
||||
Some(docs_words) => docs_words,
|
||||
None => return Ok(None),
|
||||
};
|
||||
let prefix_documents_cache = match env.open_database(Some(&prefix_documents_cache_name))? {
|
||||
Some(prefix_documents_cache) => prefix_documents_cache,
|
||||
None => return Ok(None),
|
||||
};
|
||||
let facets = match env.open_database(Some(&facets_name))? {
|
||||
Some(facets) => facets,
|
||||
None => return Ok(None),
|
||||
};
|
||||
let prefix_postings_lists_cache = match env.open_database(Some(&prefix_postings_lists_cache_name))? {
|
||||
Some(prefix_postings_lists_cache) => prefix_postings_lists_cache,
|
||||
None => return Ok(None),
|
||||
};
|
||||
let updates = match update_env.open_database(Some(&updates_name))? {
|
||||
Some(updates) => updates,
|
||||
None => return Ok(None),
|
||||
};
|
||||
let updates_results = match update_env.open_database(Some(&updates_results_name))? {
|
||||
Some(updates_results) => updates_results,
|
||||
None => return Ok(None),
|
||||
};
|
||||
|
||||
Ok(Some(Index {
|
||||
main: Main { main },
|
||||
postings_lists: PostingsLists { postings_lists },
|
||||
documents_fields: DocumentsFields { documents_fields },
|
||||
documents_fields_counts: DocumentsFieldsCounts { documents_fields_counts },
|
||||
synonyms: Synonyms { synonyms },
|
||||
docs_words: DocsWords { docs_words },
|
||||
prefix_documents_cache: PrefixDocumentsCache { prefix_documents_cache },
|
||||
facets: Facets { facets },
|
||||
prefix_postings_lists_cache: PrefixPostingsListsCache { prefix_postings_lists_cache },
|
||||
updates: Updates { updates },
|
||||
updates_results: UpdatesResults { updates_results },
|
||||
updates_notifier,
|
||||
}))
|
||||
}
|
||||
|
||||
pub fn clear(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
update_writer: &mut heed::RwTxn<UpdateT>,
|
||||
index: &Index,
|
||||
) -> MResult<()> {
|
||||
// clear all the stores
|
||||
index.main.clear(writer)?;
|
||||
index.postings_lists.clear(writer)?;
|
||||
index.documents_fields.clear(writer)?;
|
||||
index.documents_fields_counts.clear(writer)?;
|
||||
index.synonyms.clear(writer)?;
|
||||
index.docs_words.clear(writer)?;
|
||||
index.prefix_documents_cache.clear(writer)?;
|
||||
index.prefix_postings_lists_cache.clear(writer)?;
|
||||
index.updates.clear(update_writer)?;
|
||||
index.updates_results.clear(update_writer)?;
|
||||
Ok(())
|
||||
}
|
@ -1,47 +0,0 @@
|
||||
use std::borrow::Cow;
|
||||
|
||||
use heed::Result as ZResult;
|
||||
use heed::types::ByteSlice;
|
||||
use sdset::{Set, SetBuf};
|
||||
use slice_group_by::GroupBy;
|
||||
|
||||
use crate::database::MainT;
|
||||
use crate::DocIndex;
|
||||
use crate::store::{Postings, PostingsCodec};
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct PostingsLists {
|
||||
pub(crate) postings_lists: heed::Database<ByteSlice, PostingsCodec>,
|
||||
}
|
||||
|
||||
impl PostingsLists {
|
||||
pub fn put_postings_list(
|
||||
self,
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
word: &[u8],
|
||||
matches: &Set<DocIndex>,
|
||||
) -> ZResult<()> {
|
||||
let docids = matches.linear_group_by_key(|m| m.document_id).map(|g| g[0].document_id).collect();
|
||||
let docids = Cow::Owned(SetBuf::new_unchecked(docids));
|
||||
let matches = Cow::Borrowed(matches);
|
||||
let postings = Postings { docids, matches };
|
||||
|
||||
self.postings_lists.put(writer, word, &postings)
|
||||
}
|
||||
|
||||
pub fn del_postings_list(self, writer: &mut heed::RwTxn<MainT>, word: &[u8]) -> ZResult<bool> {
|
||||
self.postings_lists.delete(writer, word)
|
||||
}
|
||||
|
||||
pub fn clear(self, writer: &mut heed::RwTxn<MainT>) -> ZResult<()> {
|
||||
self.postings_lists.clear(writer)
|
||||
}
|
||||
|
||||
pub fn postings_list<'txn>(
|
||||
self,
|
||||
reader: &'txn heed::RoTxn<MainT>,
|
||||
word: &[u8],
|
||||
) -> ZResult<Option<Postings<'txn>>> {
|
||||
self.postings_lists.get(reader, word)
|
||||
}
|
||||
}
|
@ -1,80 +0,0 @@
|
||||
use std::borrow::Cow;
|
||||
|
||||
use heed::types::{OwnedType, CowSlice};
|
||||
use heed::Result as ZResult;
|
||||
use zerocopy::{AsBytes, FromBytes};
|
||||
|
||||
use super::{BEU64, BEU32};
|
||||
use crate::{DocumentId, Highlight};
|
||||
use crate::database::MainT;
|
||||
|
||||
#[derive(Debug, Copy, Clone, AsBytes, FromBytes)]
|
||||
#[repr(C)]
|
||||
pub struct PrefixKey {
|
||||
prefix: [u8; 4],
|
||||
index: BEU64,
|
||||
docid: BEU32,
|
||||
}
|
||||
|
||||
impl PrefixKey {
|
||||
pub fn new(prefix: [u8; 4], index: u64, docid: u32) -> PrefixKey {
|
||||
PrefixKey {
|
||||
prefix,
|
||||
index: BEU64::new(index),
|
||||
docid: BEU32::new(docid),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct PrefixDocumentsCache {
|
||||
pub(crate) prefix_documents_cache: heed::Database<OwnedType<PrefixKey>, CowSlice<Highlight>>,
|
||||
}
|
||||
|
||||
impl PrefixDocumentsCache {
|
||||
pub fn put_prefix_document(
|
||||
self,
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
prefix: [u8; 4],
|
||||
index: usize,
|
||||
docid: DocumentId,
|
||||
highlights: &[Highlight],
|
||||
) -> ZResult<()> {
|
||||
let key = PrefixKey::new(prefix, index as u64, docid.0);
|
||||
self.prefix_documents_cache.put(writer, &key, highlights)
|
||||
}
|
||||
|
||||
pub fn clear(self, writer: &mut heed::RwTxn<MainT>) -> ZResult<()> {
|
||||
self.prefix_documents_cache.clear(writer)
|
||||
}
|
||||
|
||||
pub fn prefix_documents<'txn>(
|
||||
self,
|
||||
reader: &'txn heed::RoTxn<MainT>,
|
||||
prefix: [u8; 4],
|
||||
) -> ZResult<PrefixDocumentsIter<'txn>> {
|
||||
let start = PrefixKey::new(prefix, 0, 0);
|
||||
let end = PrefixKey::new(prefix, u64::max_value(), u32::max_value());
|
||||
let iter = self.prefix_documents_cache.range(reader, &(start..=end))?;
|
||||
Ok(PrefixDocumentsIter { iter })
|
||||
}
|
||||
}
|
||||
|
||||
pub struct PrefixDocumentsIter<'txn> {
|
||||
iter: heed::RoRange<'txn, OwnedType<PrefixKey>, CowSlice<Highlight>>,
|
||||
}
|
||||
|
||||
impl<'txn> Iterator for PrefixDocumentsIter<'txn> {
|
||||
type Item = ZResult<(DocumentId, Cow<'txn, [Highlight]>)>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match self.iter.next() {
|
||||
Some(Ok((key, highlights))) => {
|
||||
let docid = DocumentId(key.docid.get());
|
||||
Some(Ok((docid, highlights)))
|
||||
}
|
||||
Some(Err(e)) => Some(Err(e)),
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
}
|
@ -1,45 +0,0 @@
|
||||
use std::borrow::Cow;
|
||||
|
||||
use heed::Result as ZResult;
|
||||
use heed::types::OwnedType;
|
||||
use sdset::{Set, SetBuf};
|
||||
use slice_group_by::GroupBy;
|
||||
|
||||
use crate::database::MainT;
|
||||
use crate::DocIndex;
|
||||
use crate::store::{PostingsCodec, Postings};
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct PrefixPostingsListsCache {
|
||||
pub(crate) prefix_postings_lists_cache: heed::Database<OwnedType<[u8; 4]>, PostingsCodec>,
|
||||
}
|
||||
|
||||
impl PrefixPostingsListsCache {
|
||||
pub fn put_prefix_postings_list(
|
||||
self,
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
prefix: [u8; 4],
|
||||
matches: &Set<DocIndex>,
|
||||
) -> ZResult<()>
|
||||
{
|
||||
let docids = matches.linear_group_by_key(|m| m.document_id).map(|g| g[0].document_id).collect();
|
||||
let docids = Cow::Owned(SetBuf::new_unchecked(docids));
|
||||
let matches = Cow::Borrowed(matches);
|
||||
let postings = Postings { docids, matches };
|
||||
|
||||
self.prefix_postings_lists_cache.put(writer, &prefix, &postings)
|
||||
}
|
||||
|
||||
pub fn clear(self, writer: &mut heed::RwTxn<MainT>) -> ZResult<()> {
|
||||
self.prefix_postings_lists_cache.clear(writer)
|
||||
}
|
||||
|
||||
pub fn prefix_postings_list<'txn>(
|
||||
self,
|
||||
reader: &'txn heed::RoTxn<MainT>,
|
||||
prefix: [u8; 4],
|
||||
) -> ZResult<Option<Postings<'txn>>>
|
||||
{
|
||||
self.prefix_postings_lists_cache.get(reader, &prefix)
|
||||
}
|
||||
}
|
@ -1,44 +0,0 @@
|
||||
use std::borrow::Cow;
|
||||
|
||||
use heed::Result as ZResult;
|
||||
use heed::types::ByteSlice;
|
||||
|
||||
use crate::database::MainT;
|
||||
use crate::{FstSetCow, MResult};
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct Synonyms {
|
||||
pub(crate) synonyms: heed::Database<ByteSlice, ByteSlice>,
|
||||
}
|
||||
|
||||
impl Synonyms {
|
||||
pub fn put_synonyms<A>(self, writer: &mut heed::RwTxn<MainT>, word: &[u8], synonyms: &fst::Set<A>) -> ZResult<()>
|
||||
where A: AsRef<[u8]>,
|
||||
{
|
||||
let bytes = synonyms.as_fst().as_bytes();
|
||||
self.synonyms.put(writer, word, bytes)
|
||||
}
|
||||
|
||||
pub fn del_synonyms(self, writer: &mut heed::RwTxn<MainT>, word: &[u8]) -> ZResult<bool> {
|
||||
self.synonyms.delete(writer, word)
|
||||
}
|
||||
|
||||
pub fn clear(self, writer: &mut heed::RwTxn<MainT>) -> ZResult<()> {
|
||||
self.synonyms.clear(writer)
|
||||
}
|
||||
|
||||
pub(crate) fn synonyms_fst<'txn>(self, reader: &'txn heed::RoTxn<MainT>, word: &[u8]) -> ZResult<FstSetCow<'txn>> {
|
||||
match self.synonyms.get(reader, word)? {
|
||||
Some(bytes) => Ok(fst::Set::new(bytes).unwrap().map_data(Cow::Borrowed).unwrap()),
|
||||
None => Ok(fst::Set::default().map_data(Cow::Owned).unwrap()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn synonyms(self, reader: &heed::RoTxn<MainT>, word: &[u8]) -> MResult<Vec<String>> {
|
||||
let synonyms = self
|
||||
.synonyms_fst(&reader, word)?
|
||||
.stream()
|
||||
.into_strs()?;
|
||||
Ok(synonyms)
|
||||
}
|
||||
}
|
@ -1,65 +0,0 @@
|
||||
use super::BEU64;
|
||||
use crate::database::UpdateT;
|
||||
use crate::update::Update;
|
||||
use heed::types::{OwnedType, SerdeJson};
|
||||
use heed::Result as ZResult;
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct Updates {
|
||||
pub(crate) updates: heed::Database<OwnedType<BEU64>, SerdeJson<Update>>,
|
||||
}
|
||||
|
||||
impl Updates {
|
||||
// TODO do not trigger deserialize if possible
|
||||
pub fn last_update(self, reader: &heed::RoTxn<UpdateT>) -> ZResult<Option<(u64, Update)>> {
|
||||
match self.updates.last(reader)? {
|
||||
Some((key, data)) => Ok(Some((key.get(), data))),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
// TODO do not trigger deserialize if possible
|
||||
pub fn first_update(self, reader: &heed::RoTxn<UpdateT>) -> ZResult<Option<(u64, Update)>> {
|
||||
match self.updates.first(reader)? {
|
||||
Some((key, data)) => Ok(Some((key.get(), data))),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
// TODO do not trigger deserialize if possible
|
||||
pub fn get(self, reader: &heed::RoTxn<UpdateT>, update_id: u64) -> ZResult<Option<Update>> {
|
||||
let update_id = BEU64::new(update_id);
|
||||
self.updates.get(reader, &update_id)
|
||||
}
|
||||
|
||||
pub fn put_update(
|
||||
self,
|
||||
writer: &mut heed::RwTxn<UpdateT>,
|
||||
update_id: u64,
|
||||
update: &Update,
|
||||
) -> ZResult<()> {
|
||||
// TODO prefer using serde_json?
|
||||
let update_id = BEU64::new(update_id);
|
||||
self.updates.put(writer, &update_id, update)
|
||||
}
|
||||
|
||||
pub fn del_update(self, writer: &mut heed::RwTxn<UpdateT>, update_id: u64) -> ZResult<bool> {
|
||||
let update_id = BEU64::new(update_id);
|
||||
self.updates.delete(writer, &update_id)
|
||||
}
|
||||
|
||||
pub fn pop_front(self, writer: &mut heed::RwTxn<UpdateT>) -> ZResult<Option<(u64, Update)>> {
|
||||
match self.first_update(writer)? {
|
||||
Some((update_id, update)) => {
|
||||
let key = BEU64::new(update_id);
|
||||
self.updates.delete(writer, &key)?;
|
||||
Ok(Some((update_id, update)))
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn clear(self, writer: &mut heed::RwTxn<UpdateT>) -> ZResult<()> {
|
||||
self.updates.clear(writer)
|
||||
}
|
||||
}
|
@ -1,45 +0,0 @@
|
||||
use super::BEU64;
|
||||
use crate::database::UpdateT;
|
||||
use crate::update::ProcessedUpdateResult;
|
||||
use heed::types::{OwnedType, SerdeJson};
|
||||
use heed::Result as ZResult;
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct UpdatesResults {
|
||||
pub(crate) updates_results: heed::Database<OwnedType<BEU64>, SerdeJson<ProcessedUpdateResult>>,
|
||||
}
|
||||
|
||||
impl UpdatesResults {
|
||||
pub fn last_update(
|
||||
self,
|
||||
reader: &heed::RoTxn<UpdateT>,
|
||||
) -> ZResult<Option<(u64, ProcessedUpdateResult)>> {
|
||||
match self.updates_results.last(reader)? {
|
||||
Some((key, data)) => Ok(Some((key.get(), data))),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn put_update_result(
|
||||
self,
|
||||
writer: &mut heed::RwTxn<UpdateT>,
|
||||
update_id: u64,
|
||||
update_result: &ProcessedUpdateResult,
|
||||
) -> ZResult<()> {
|
||||
let update_id = BEU64::new(update_id);
|
||||
self.updates_results.put(writer, &update_id, update_result)
|
||||
}
|
||||
|
||||
pub fn update_result(
|
||||
self,
|
||||
reader: &heed::RoTxn<UpdateT>,
|
||||
update_id: u64,
|
||||
) -> ZResult<Option<ProcessedUpdateResult>> {
|
||||
let update_id = BEU64::new(update_id);
|
||||
self.updates_results.get(reader, &update_id)
|
||||
}
|
||||
|
||||
pub fn clear(self, writer: &mut heed::RwTxn<UpdateT>) -> ZResult<()> {
|
||||
self.updates_results.clear(writer)
|
||||
}
|
||||
}
|
@ -1,36 +0,0 @@
|
||||
use crate::database::{MainT, UpdateT};
|
||||
use crate::update::{next_update_id, Update};
|
||||
use crate::{store, MResult, RankedMap};
|
||||
|
||||
pub fn apply_clear_all(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
index: &store::Index,
|
||||
) -> MResult<()> {
|
||||
index.main.put_words_fst(writer, &fst::Set::default())?;
|
||||
index.main.put_external_docids(writer, &fst::Map::default())?;
|
||||
index.main.put_internal_docids(writer, &sdset::SetBuf::default())?;
|
||||
index.main.put_ranked_map(writer, &RankedMap::default())?;
|
||||
index.main.put_number_of_documents(writer, |_| 0)?;
|
||||
index.main.put_sorted_document_ids_cache(writer, &[])?;
|
||||
index.documents_fields.clear(writer)?;
|
||||
index.documents_fields_counts.clear(writer)?;
|
||||
index.postings_lists.clear(writer)?;
|
||||
index.docs_words.clear(writer)?;
|
||||
index.prefix_documents_cache.clear(writer)?;
|
||||
index.prefix_postings_lists_cache.clear(writer)?;
|
||||
index.facets.clear(writer)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn push_clear_all(
|
||||
writer: &mut heed::RwTxn<UpdateT>,
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
) -> MResult<u64> {
|
||||
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
|
||||
let update = Update::clear_all();
|
||||
updates_store.put_update(writer, last_update_id, &update)?;
|
||||
|
||||
Ok(last_update_id)
|
||||
}
|
@ -1,26 +0,0 @@
|
||||
|
||||
use crate::database::{MainT, UpdateT};
|
||||
use crate::{store, MResult};
|
||||
use crate::update::{next_update_id, Update};
|
||||
|
||||
pub fn apply_customs_update(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
main_store: store::Main,
|
||||
customs: &[u8],
|
||||
) -> MResult<()> {
|
||||
main_store.put_customs(writer, customs)
|
||||
}
|
||||
|
||||
pub fn push_customs_update(
|
||||
writer: &mut heed::RwTxn<UpdateT>,
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
customs: Vec<u8>,
|
||||
) -> MResult<u64> {
|
||||
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
|
||||
|
||||
let update = Update::customs(customs);
|
||||
updates_store.put_update(writer, last_update_id, &update)?;
|
||||
|
||||
Ok(last_update_id)
|
||||
}
|
@ -1,444 +0,0 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::{HashMap, BTreeMap};
|
||||
|
||||
use fst::{set::OpBuilder, SetBuilder};
|
||||
use indexmap::IndexMap;
|
||||
use meilisearch_schema::{Schema, FieldId};
|
||||
use meilisearch_types::DocumentId;
|
||||
use sdset::{duo::Union, SetOperation};
|
||||
use serde::Deserialize;
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::database::{MainT, UpdateT};
|
||||
use crate::database::{UpdateEvent, UpdateEventsEmitter};
|
||||
use crate::facets;
|
||||
use crate::raw_indexer::RawIndexer;
|
||||
use crate::serde::Deserializer;
|
||||
use crate::store::{self, DocumentsFields, DocumentsFieldsCounts, DiscoverIds};
|
||||
use crate::update::helpers::{index_value, value_to_number, extract_document_id};
|
||||
use crate::update::{apply_documents_deletion, compute_short_prefixes, next_update_id, Update};
|
||||
use crate::{Error, MResult, RankedMap};
|
||||
|
||||
pub struct DocumentsAddition<D> {
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
// Whether the user explicitly set the primary key in the update
|
||||
primary_key: Option<String>,
|
||||
documents: Vec<D>,
|
||||
is_partial: bool,
|
||||
}
|
||||
|
||||
impl<D> DocumentsAddition<D> {
|
||||
pub fn new(
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
) -> DocumentsAddition<D> {
|
||||
DocumentsAddition {
|
||||
updates_store,
|
||||
updates_results_store,
|
||||
updates_notifier,
|
||||
documents: Vec::new(),
|
||||
is_partial: false,
|
||||
primary_key: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_partial(
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
) -> DocumentsAddition<D> {
|
||||
DocumentsAddition {
|
||||
updates_store,
|
||||
updates_results_store,
|
||||
updates_notifier,
|
||||
documents: Vec::new(),
|
||||
is_partial: true,
|
||||
primary_key: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_primary_key(&mut self, primary_key: String) {
|
||||
self.primary_key = Some(primary_key);
|
||||
}
|
||||
|
||||
pub fn update_document(&mut self, document: D) {
|
||||
self.documents.push(document);
|
||||
}
|
||||
|
||||
pub fn finalize(self, writer: &mut heed::RwTxn<UpdateT>) -> MResult<u64>
|
||||
where
|
||||
D: serde::Serialize,
|
||||
{
|
||||
let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
|
||||
let update_id = push_documents_addition(
|
||||
writer,
|
||||
self.updates_store,
|
||||
self.updates_results_store,
|
||||
self.documents,
|
||||
self.is_partial,
|
||||
self.primary_key,
|
||||
)?;
|
||||
Ok(update_id)
|
||||
}
|
||||
}
|
||||
|
||||
impl<D> Extend<D> for DocumentsAddition<D> {
|
||||
fn extend<T: IntoIterator<Item = D>>(&mut self, iter: T) {
|
||||
self.documents.extend(iter)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn push_documents_addition<D: serde::Serialize>(
|
||||
writer: &mut heed::RwTxn<UpdateT>,
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
addition: Vec<D>,
|
||||
is_partial: bool,
|
||||
primary_key: Option<String>,
|
||||
) -> MResult<u64> {
|
||||
let mut values = Vec::with_capacity(addition.len());
|
||||
for add in addition {
|
||||
let vec = serde_json::to_vec(&add)?;
|
||||
let add = serde_json::from_slice(&vec)?;
|
||||
values.push(add);
|
||||
}
|
||||
|
||||
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
|
||||
|
||||
let update = if is_partial {
|
||||
Update::documents_partial(primary_key, values)
|
||||
} else {
|
||||
Update::documents_addition(primary_key, values)
|
||||
};
|
||||
|
||||
updates_store.put_update(writer, last_update_id, &update)?;
|
||||
|
||||
Ok(last_update_id)
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn index_document<A: AsRef<[u8]>>(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
documents_fields: DocumentsFields,
|
||||
documents_fields_counts: DocumentsFieldsCounts,
|
||||
ranked_map: &mut RankedMap,
|
||||
indexer: &mut RawIndexer<A>,
|
||||
schema: &Schema,
|
||||
field_id: FieldId,
|
||||
document_id: DocumentId,
|
||||
value: &Value,
|
||||
) -> MResult<()>
|
||||
{
|
||||
let serialized = serde_json::to_vec(value)?;
|
||||
documents_fields.put_document_field(writer, document_id, field_id, &serialized)?;
|
||||
|
||||
if let Some(indexed_pos) = schema.is_searchable(field_id) {
|
||||
let number_of_words = index_value(indexer, document_id, indexed_pos, value);
|
||||
if let Some(number_of_words) = number_of_words {
|
||||
documents_fields_counts.put_document_field_count(
|
||||
writer,
|
||||
document_id,
|
||||
indexed_pos,
|
||||
number_of_words as u16,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
if schema.is_ranked(field_id) {
|
||||
let number = value_to_number(value).unwrap_or_default();
|
||||
ranked_map.insert(document_id, field_id, number);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn apply_addition(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
index: &store::Index,
|
||||
new_documents: Vec<IndexMap<String, Value>>,
|
||||
partial: bool,
|
||||
primary_key: Option<String>,
|
||||
) -> MResult<()>
|
||||
{
|
||||
let mut schema = match index.main.schema(writer)? {
|
||||
Some(schema) => schema,
|
||||
None => return Err(Error::SchemaMissing),
|
||||
};
|
||||
|
||||
// Retrieve the documents ids related structures
|
||||
let external_docids = index.main.external_docids(writer)?;
|
||||
let internal_docids = index.main.internal_docids(writer)?;
|
||||
let mut available_ids = DiscoverIds::new(&internal_docids);
|
||||
|
||||
let primary_key = match schema.primary_key() {
|
||||
Some(primary_key) => primary_key.to_string(),
|
||||
None => {
|
||||
let name = primary_key.ok_or(Error::MissingPrimaryKey)?;
|
||||
schema.set_primary_key(&name)?;
|
||||
name
|
||||
}
|
||||
};
|
||||
|
||||
// 1. store documents ids for future deletion
|
||||
let mut documents_additions = HashMap::new();
|
||||
let mut new_external_docids = BTreeMap::new();
|
||||
let mut new_internal_docids = Vec::with_capacity(new_documents.len());
|
||||
|
||||
for mut document in new_documents {
|
||||
let external_docids_get = |docid: &str| {
|
||||
match (external_docids.get(docid), new_external_docids.get(docid)) {
|
||||
(_, Some(&id))
|
||||
| (Some(id), _) => Some(id as u32),
|
||||
(None, None) => None,
|
||||
}
|
||||
};
|
||||
|
||||
let (internal_docid, external_docid) =
|
||||
extract_document_id(
|
||||
&primary_key,
|
||||
&document,
|
||||
&external_docids_get,
|
||||
&mut available_ids,
|
||||
)?;
|
||||
|
||||
new_external_docids.insert(external_docid, internal_docid.0 as u64);
|
||||
new_internal_docids.push(internal_docid);
|
||||
|
||||
if partial {
|
||||
let mut deserializer = Deserializer {
|
||||
document_id: internal_docid,
|
||||
reader: writer,
|
||||
documents_fields: index.documents_fields,
|
||||
schema: &schema,
|
||||
fields: None,
|
||||
};
|
||||
|
||||
let old_document = Option::<HashMap<String, Value>>::deserialize(&mut deserializer)?;
|
||||
if let Some(old_document) = old_document {
|
||||
for (key, value) in old_document {
|
||||
document.entry(key).or_insert(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
documents_additions.insert(internal_docid, document);
|
||||
}
|
||||
|
||||
// 2. remove the documents postings lists
|
||||
let number_of_inserted_documents = documents_additions.len();
|
||||
let documents_ids = new_external_docids.iter().map(|(id, _)| id.clone()).collect();
|
||||
apply_documents_deletion(writer, index, documents_ids)?;
|
||||
|
||||
let mut ranked_map = match index.main.ranked_map(writer)? {
|
||||
Some(ranked_map) => ranked_map,
|
||||
None => RankedMap::default(),
|
||||
};
|
||||
|
||||
let stop_words = index.main.stop_words_fst(writer)?.map_data(Cow::into_owned)?;
|
||||
|
||||
|
||||
let mut indexer = RawIndexer::new(&stop_words);
|
||||
|
||||
// For each document in this update
|
||||
for (document_id, document) in &documents_additions {
|
||||
// For each key-value pair in the document.
|
||||
for (attribute, value) in document {
|
||||
let (field_id, _) = schema.insert_with_position(&attribute)?;
|
||||
index_document(
|
||||
writer,
|
||||
index.documents_fields,
|
||||
index.documents_fields_counts,
|
||||
&mut ranked_map,
|
||||
&mut indexer,
|
||||
&schema,
|
||||
field_id,
|
||||
*document_id,
|
||||
&value,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
write_documents_addition_index(
|
||||
writer,
|
||||
index,
|
||||
&ranked_map,
|
||||
number_of_inserted_documents,
|
||||
indexer,
|
||||
)?;
|
||||
|
||||
index.main.put_schema(writer, &schema)?;
|
||||
|
||||
let new_external_docids = fst::Map::from_iter(new_external_docids.iter().map(|(ext, id)| (ext, *id as u64)))?;
|
||||
let new_internal_docids = sdset::SetBuf::from_dirty(new_internal_docids);
|
||||
index.main.merge_external_docids(writer, &new_external_docids)?;
|
||||
index.main.merge_internal_docids(writer, &new_internal_docids)?;
|
||||
|
||||
// recompute all facet attributes after document update.
|
||||
if let Some(attributes_for_facetting) = index.main.attributes_for_faceting(writer)? {
|
||||
let docids = index.main.internal_docids(writer)?;
|
||||
let facet_map = facets::facet_map_from_docids(writer, index, &docids, attributes_for_facetting.as_ref())?;
|
||||
index.facets.add(writer, facet_map)?;
|
||||
}
|
||||
|
||||
// update is finished; update sorted document id cache with new state
|
||||
let mut document_ids = index.main.internal_docids(writer)?.to_vec();
|
||||
super::cache_document_ids_sorted(writer, &ranked_map, index, &mut document_ids)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn apply_documents_partial_addition(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
index: &store::Index,
|
||||
new_documents: Vec<IndexMap<String, Value>>,
|
||||
primary_key: Option<String>,
|
||||
) -> MResult<()> {
|
||||
apply_addition(writer, index, new_documents, true, primary_key)
|
||||
}
|
||||
|
||||
pub fn apply_documents_addition(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
index: &store::Index,
|
||||
new_documents: Vec<IndexMap<String, Value>>,
|
||||
primary_key: Option<String>,
|
||||
) -> MResult<()> {
|
||||
apply_addition(writer, index, new_documents, false, primary_key)
|
||||
}
|
||||
|
||||
pub fn reindex_all_documents(writer: &mut heed::RwTxn<MainT>, index: &store::Index) -> MResult<()> {
|
||||
let schema = match index.main.schema(writer)? {
|
||||
Some(schema) => schema,
|
||||
None => return Err(Error::SchemaMissing),
|
||||
};
|
||||
|
||||
let mut ranked_map = RankedMap::default();
|
||||
|
||||
// 1. retrieve all documents ids
|
||||
let mut documents_ids_to_reindex = Vec::new();
|
||||
for result in index.documents_fields_counts.documents_ids(writer)? {
|
||||
let document_id = result?;
|
||||
documents_ids_to_reindex.push(document_id);
|
||||
}
|
||||
|
||||
// 2. remove the documents posting lists
|
||||
index.main.put_words_fst(writer, &fst::Set::default())?;
|
||||
index.main.put_ranked_map(writer, &ranked_map)?;
|
||||
index.main.put_number_of_documents(writer, |_| 0)?;
|
||||
index.facets.clear(writer)?;
|
||||
index.postings_lists.clear(writer)?;
|
||||
index.docs_words.clear(writer)?;
|
||||
|
||||
let stop_words = index.main
|
||||
.stop_words_fst(writer)?
|
||||
.map_data(Cow::into_owned)
|
||||
.unwrap();
|
||||
|
||||
let number_of_inserted_documents = documents_ids_to_reindex.len();
|
||||
let mut indexer = RawIndexer::new(&stop_words);
|
||||
let mut ram_store = HashMap::new();
|
||||
|
||||
if let Some(ref attributes_for_facetting) = index.main.attributes_for_faceting(writer)? {
|
||||
let facet_map = facets::facet_map_from_docids(writer, &index, &documents_ids_to_reindex, &attributes_for_facetting)?;
|
||||
index.facets.add(writer, facet_map)?;
|
||||
}
|
||||
// ^-- https://github.com/meilisearch/MeiliSearch/pull/631#issuecomment-626624470 --v
|
||||
for document_id in &documents_ids_to_reindex {
|
||||
for result in index.documents_fields.document_fields(writer, *document_id)? {
|
||||
let (field_id, bytes) = result?;
|
||||
let value: Value = serde_json::from_slice(bytes)?;
|
||||
ram_store.insert((document_id, field_id), value);
|
||||
}
|
||||
|
||||
// For each key-value pair in the document.
|
||||
for ((document_id, field_id), value) in ram_store.drain() {
|
||||
index_document(
|
||||
writer,
|
||||
index.documents_fields,
|
||||
index.documents_fields_counts,
|
||||
&mut ranked_map,
|
||||
&mut indexer,
|
||||
&schema,
|
||||
field_id,
|
||||
*document_id,
|
||||
&value,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
// 4. write the new index in the main store
|
||||
write_documents_addition_index(
|
||||
writer,
|
||||
index,
|
||||
&ranked_map,
|
||||
number_of_inserted_documents,
|
||||
indexer,
|
||||
)?;
|
||||
|
||||
index.main.put_schema(writer, &schema)?;
|
||||
|
||||
// recompute all facet attributes after document update.
|
||||
if let Some(attributes_for_facetting) = index.main.attributes_for_faceting(writer)? {
|
||||
let docids = index.main.internal_docids(writer)?;
|
||||
let facet_map = facets::facet_map_from_docids(writer, index, &docids, attributes_for_facetting.as_ref())?;
|
||||
index.facets.add(writer, facet_map)?;
|
||||
}
|
||||
|
||||
// update is finished; update sorted document id cache with new state
|
||||
let mut document_ids = index.main.internal_docids(writer)?.to_vec();
|
||||
super::cache_document_ids_sorted(writer, &ranked_map, index, &mut document_ids)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn write_documents_addition_index<A: AsRef<[u8]>>(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
index: &store::Index,
|
||||
ranked_map: &RankedMap,
|
||||
number_of_inserted_documents: usize,
|
||||
indexer: RawIndexer<A>,
|
||||
) -> MResult<()>
|
||||
{
|
||||
let indexed = indexer.build();
|
||||
let mut delta_words_builder = SetBuilder::memory();
|
||||
|
||||
for (word, delta_set) in indexed.words_doc_indexes {
|
||||
delta_words_builder.insert(&word).unwrap();
|
||||
|
||||
let set = match index.postings_lists.postings_list(writer, &word)? {
|
||||
Some(postings) => Union::new(&postings.matches, &delta_set).into_set_buf(),
|
||||
None => delta_set,
|
||||
};
|
||||
|
||||
index.postings_lists.put_postings_list(writer, &word, &set)?;
|
||||
}
|
||||
|
||||
for (id, words) in indexed.docs_words {
|
||||
index.docs_words.put_doc_words(writer, id, &words)?;
|
||||
}
|
||||
|
||||
let delta_words = delta_words_builder.into_set();
|
||||
|
||||
let words_fst = index.main.words_fst(writer)?;
|
||||
let words = if !words_fst.is_empty() {
|
||||
let op = OpBuilder::new()
|
||||
.add(words_fst.stream())
|
||||
.add(delta_words.stream())
|
||||
.r#union();
|
||||
|
||||
let mut words_builder = SetBuilder::memory();
|
||||
words_builder.extend_stream(op).unwrap();
|
||||
words_builder.into_set()
|
||||
} else {
|
||||
delta_words
|
||||
};
|
||||
|
||||
index.main.put_words_fst(writer, &words)?;
|
||||
index.main.put_ranked_map(writer, ranked_map)?;
|
||||
index.main.put_number_of_documents(writer, |old| old + number_of_inserted_documents as u64)?;
|
||||
|
||||
compute_short_prefixes(writer, &words, index)?;
|
||||
|
||||
Ok(())
|
||||
}
|
@ -1,207 +0,0 @@
|
||||
use std::collections::{BTreeSet, HashMap, HashSet};
|
||||
|
||||
use fst::{SetBuilder, Streamer};
|
||||
use sdset::{duo::DifferenceByKey, SetBuf, SetOperation};
|
||||
|
||||
use crate::database::{MainT, UpdateT};
|
||||
use crate::database::{UpdateEvent, UpdateEventsEmitter};
|
||||
use crate::facets;
|
||||
use crate::store;
|
||||
use crate::update::{next_update_id, compute_short_prefixes, Update};
|
||||
use crate::{DocumentId, Error, MResult, RankedMap, MainWriter, Index};
|
||||
|
||||
pub struct DocumentsDeletion {
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
external_docids: Vec<String>,
|
||||
}
|
||||
|
||||
impl DocumentsDeletion {
|
||||
pub fn new(
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
) -> DocumentsDeletion {
|
||||
DocumentsDeletion {
|
||||
updates_store,
|
||||
updates_results_store,
|
||||
updates_notifier,
|
||||
external_docids: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn delete_document_by_external_docid(&mut self, document_id: String) {
|
||||
self.external_docids.push(document_id);
|
||||
}
|
||||
|
||||
pub fn finalize(self, writer: &mut heed::RwTxn<UpdateT>) -> MResult<u64> {
|
||||
let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
|
||||
let update_id = push_documents_deletion(
|
||||
writer,
|
||||
self.updates_store,
|
||||
self.updates_results_store,
|
||||
self.external_docids,
|
||||
)?;
|
||||
Ok(update_id)
|
||||
}
|
||||
}
|
||||
|
||||
impl Extend<String> for DocumentsDeletion {
|
||||
fn extend<T: IntoIterator<Item=String>>(&mut self, iter: T) {
|
||||
self.external_docids.extend(iter)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn push_documents_deletion(
|
||||
writer: &mut heed::RwTxn<UpdateT>,
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
external_docids: Vec<String>,
|
||||
) -> MResult<u64> {
|
||||
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
|
||||
|
||||
let update = Update::documents_deletion(external_docids);
|
||||
updates_store.put_update(writer, last_update_id, &update)?;
|
||||
|
||||
Ok(last_update_id)
|
||||
}
|
||||
|
||||
pub fn apply_documents_deletion(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
index: &store::Index,
|
||||
external_docids: Vec<String>,
|
||||
) -> MResult<()>
|
||||
{
|
||||
let (external_docids, internal_docids) = {
|
||||
let new_external_docids = SetBuf::from_dirty(external_docids);
|
||||
let mut internal_docids = Vec::new();
|
||||
|
||||
let old_external_docids = index.main.external_docids(writer)?;
|
||||
for external_docid in new_external_docids.as_slice() {
|
||||
if let Some(id) = old_external_docids.get(external_docid) {
|
||||
internal_docids.push(DocumentId(id as u32));
|
||||
}
|
||||
}
|
||||
|
||||
let new_external_docids = fst::Map::from_iter(new_external_docids.into_iter().map(|k| (k, 0))).unwrap();
|
||||
(new_external_docids, SetBuf::from_dirty(internal_docids))
|
||||
};
|
||||
|
||||
let schema = match index.main.schema(writer)? {
|
||||
Some(schema) => schema,
|
||||
None => return Err(Error::SchemaMissing),
|
||||
};
|
||||
|
||||
let mut ranked_map = match index.main.ranked_map(writer)? {
|
||||
Some(ranked_map) => ranked_map,
|
||||
None => RankedMap::default(),
|
||||
};
|
||||
|
||||
// facet filters deletion
|
||||
if let Some(attributes_for_facetting) = index.main.attributes_for_faceting(writer)? {
|
||||
let facet_map = facets::facet_map_from_docids(writer, &index, &internal_docids, &attributes_for_facetting)?;
|
||||
index.facets.remove(writer, facet_map)?;
|
||||
}
|
||||
|
||||
// collect the ranked attributes according to the schema
|
||||
let ranked_fields = schema.ranked();
|
||||
|
||||
let mut words_document_ids = HashMap::new();
|
||||
for id in internal_docids.iter().cloned() {
|
||||
// remove all the ranked attributes from the ranked_map
|
||||
for ranked_attr in ranked_fields {
|
||||
ranked_map.remove(id, *ranked_attr);
|
||||
}
|
||||
|
||||
let words = index.docs_words.doc_words(writer, id)?;
|
||||
if !words.is_empty() {
|
||||
let mut stream = words.stream();
|
||||
while let Some(word) = stream.next() {
|
||||
let word = word.to_vec();
|
||||
words_document_ids
|
||||
.entry(word)
|
||||
.or_insert_with(Vec::new)
|
||||
.push(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut deleted_documents = HashSet::new();
|
||||
let mut removed_words = BTreeSet::new();
|
||||
for (word, document_ids) in words_document_ids {
|
||||
let document_ids = SetBuf::from_dirty(document_ids);
|
||||
|
||||
if let Some(postings) = index.postings_lists.postings_list(writer, &word)? {
|
||||
let op = DifferenceByKey::new(&postings.matches, &document_ids, |d| d.document_id, |id| *id);
|
||||
let doc_indexes = op.into_set_buf();
|
||||
|
||||
if !doc_indexes.is_empty() {
|
||||
index.postings_lists.put_postings_list(writer, &word, &doc_indexes)?;
|
||||
} else {
|
||||
index.postings_lists.del_postings_list(writer, &word)?;
|
||||
removed_words.insert(word);
|
||||
}
|
||||
}
|
||||
|
||||
for id in document_ids {
|
||||
index.documents_fields_counts.del_all_document_fields_counts(writer, id)?;
|
||||
if index.documents_fields.del_all_document_fields(writer, id)? != 0 {
|
||||
deleted_documents.insert(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let deleted_documents_len = deleted_documents.len() as u64;
|
||||
for id in &deleted_documents {
|
||||
index.docs_words.del_doc_words(writer, *id)?;
|
||||
}
|
||||
|
||||
let removed_words = fst::Set::from_iter(removed_words).unwrap();
|
||||
let words = {
|
||||
let words_set = index.main.words_fst(writer)?;
|
||||
let op = fst::set::OpBuilder::new()
|
||||
.add(words_set.stream())
|
||||
.add(removed_words.stream())
|
||||
.difference();
|
||||
|
||||
let mut words_builder = SetBuilder::memory();
|
||||
words_builder.extend_stream(op).unwrap();
|
||||
words_builder.into_set()
|
||||
};
|
||||
|
||||
index.main.put_words_fst(writer, &words)?;
|
||||
index.main.put_ranked_map(writer, &ranked_map)?;
|
||||
index.main.put_number_of_documents(writer, |old| old - deleted_documents_len)?;
|
||||
|
||||
// We apply the changes to the user and internal ids
|
||||
index.main.remove_external_docids(writer, &external_docids)?;
|
||||
index.main.remove_internal_docids(writer, &internal_docids)?;
|
||||
|
||||
compute_short_prefixes(writer, &words, index)?;
|
||||
|
||||
// update is finished; update sorted document id cache with new state
|
||||
document_cache_remove_deleted(writer, index, &ranked_map, &deleted_documents)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// rebuilds the document id cache by either removing deleted documents from the existing cache,
|
||||
/// and generating a new one from docs in store
|
||||
fn document_cache_remove_deleted(writer: &mut MainWriter, index: &Index, ranked_map: &RankedMap, documents_to_delete: &HashSet<DocumentId>) -> MResult<()> {
|
||||
let new_cache = match index.main.sorted_document_ids_cache(writer)? {
|
||||
// only keep documents that are not in the list of deleted documents. Order is preserved,
|
||||
// no need to resort
|
||||
Some(old_cache) => {
|
||||
old_cache.iter().filter(|docid| !documents_to_delete.contains(docid)).cloned().collect::<Vec<_>>()
|
||||
}
|
||||
// couldn't find cached documents, try building a new cache from documents in store
|
||||
None => {
|
||||
let mut document_ids = index.main.internal_docids(writer)?.to_vec();
|
||||
super::cache_document_ids_sorted(writer, ranked_map, index, &mut document_ids)?;
|
||||
document_ids
|
||||
}
|
||||
};
|
||||
index.main.put_sorted_document_ids_cache(writer, &new_cache)?;
|
||||
Ok(())
|
||||
}
|
@ -1,142 +0,0 @@
|
||||
use std::fmt::Write as _;
|
||||
|
||||
use indexmap::IndexMap;
|
||||
use meilisearch_schema::IndexedPos;
|
||||
use meilisearch_types::DocumentId;
|
||||
use ordered_float::OrderedFloat;
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::Number;
|
||||
use crate::raw_indexer::RawIndexer;
|
||||
use crate::serde::SerializerError;
|
||||
use crate::store::DiscoverIds;
|
||||
|
||||
/// Returns the number of words indexed or `None` if the type is unindexable.
|
||||
pub fn index_value<A: AsRef<[u8]>>(
|
||||
indexer: &mut RawIndexer<A>,
|
||||
document_id: DocumentId,
|
||||
indexed_pos: IndexedPos,
|
||||
value: &Value,
|
||||
) -> Option<usize>
|
||||
{
|
||||
match value {
|
||||
Value::Null => None,
|
||||
Value::Bool(boolean) => {
|
||||
let text = boolean.to_string();
|
||||
let number_of_words = indexer.index_text(document_id, indexed_pos, &text);
|
||||
Some(number_of_words)
|
||||
},
|
||||
Value::Number(number) => {
|
||||
let text = number.to_string();
|
||||
Some(indexer.index_text(document_id, indexed_pos, &text))
|
||||
},
|
||||
Value::String(string) => {
|
||||
Some(indexer.index_text(document_id, indexed_pos, &string))
|
||||
},
|
||||
Value::Array(_) => {
|
||||
let text = value_to_string(value);
|
||||
Some(indexer.index_text(document_id, indexed_pos, &text))
|
||||
},
|
||||
Value::Object(_) => {
|
||||
let text = value_to_string(value);
|
||||
Some(indexer.index_text(document_id, indexed_pos, &text))
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Transforms the JSON Value type into a String.
|
||||
pub fn value_to_string(value: &Value) -> String {
|
||||
fn internal_value_to_string(string: &mut String, value: &Value) {
|
||||
match value {
|
||||
Value::Null => (),
|
||||
Value::Bool(boolean) => { let _ = write!(string, "{}", &boolean); },
|
||||
Value::Number(number) => { let _ = write!(string, "{}", &number); },
|
||||
Value::String(text) => string.push_str(&text),
|
||||
Value::Array(array) => {
|
||||
for value in array {
|
||||
internal_value_to_string(string, value);
|
||||
let _ = string.write_str(". ");
|
||||
}
|
||||
},
|
||||
Value::Object(object) => {
|
||||
for (key, value) in object {
|
||||
string.push_str(key);
|
||||
let _ = string.write_str(". ");
|
||||
internal_value_to_string(string, value);
|
||||
let _ = string.write_str(". ");
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
let mut string = String::new();
|
||||
internal_value_to_string(&mut string, value);
|
||||
string
|
||||
}
|
||||
|
||||
/// Transforms the JSON Value type into a Number.
|
||||
pub fn value_to_number(value: &Value) -> Option<Number> {
|
||||
use std::str::FromStr;
|
||||
|
||||
match value {
|
||||
Value::Null => None,
|
||||
Value::Bool(boolean) => Some(Number::Unsigned(*boolean as u64)),
|
||||
Value::Number(number) => {
|
||||
match (number.as_i64(), number.as_u64(), number.as_f64()) {
|
||||
(Some(n), _, _) => Some(Number::Signed(n)),
|
||||
(_, Some(n), _) => Some(Number::Unsigned(n)),
|
||||
(_, _, Some(n)) => Some(Number::Float(OrderedFloat(n))),
|
||||
(None, None, None) => None,
|
||||
}
|
||||
},
|
||||
Value::String(string) => Number::from_str(string).ok(),
|
||||
Value::Array(_array) => None,
|
||||
Value::Object(_object) => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Validates a string representation to be a correct document id and returns
|
||||
/// the corresponding id or generate a new one, this is the way we produce documents ids.
|
||||
pub fn discover_document_id<F>(
|
||||
docid: &str,
|
||||
external_docids_get: F,
|
||||
available_docids: &mut DiscoverIds<'_>,
|
||||
) -> Result<DocumentId, SerializerError>
|
||||
where
|
||||
F: FnOnce(&str) -> Option<u32>
|
||||
{
|
||||
if docid.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') {
|
||||
match external_docids_get(docid) {
|
||||
Some(id) => Ok(DocumentId(id)),
|
||||
None => {
|
||||
let internal_id = available_docids.next().expect("no more ids available");
|
||||
Ok(internal_id)
|
||||
},
|
||||
}
|
||||
} else {
|
||||
Err(SerializerError::InvalidDocumentIdFormat)
|
||||
}
|
||||
}
|
||||
|
||||
/// Extracts and validates the document id of a document.
|
||||
pub fn extract_document_id<F>(
|
||||
primary_key: &str,
|
||||
document: &IndexMap<String, Value>,
|
||||
external_docids_get: F,
|
||||
available_docids: &mut DiscoverIds<'_>,
|
||||
) -> Result<(DocumentId, String), SerializerError>
|
||||
where
|
||||
F: FnOnce(&str) -> Option<u32>
|
||||
{
|
||||
match document.get(primary_key) {
|
||||
Some(value) => {
|
||||
let docid = match value {
|
||||
Value::Number(number) => number.to_string(),
|
||||
Value::String(string) => string.clone(),
|
||||
_ => return Err(SerializerError::InvalidDocumentIdFormat),
|
||||
};
|
||||
discover_document_id(&docid, external_docids_get, available_docids).map(|id| (id, docid))
|
||||
}
|
||||
None => Err(SerializerError::DocumentIdNotFound),
|
||||
}
|
||||
}
|
@ -1,391 +0,0 @@
|
||||
mod clear_all;
|
||||
mod customs_update;
|
||||
mod documents_addition;
|
||||
mod documents_deletion;
|
||||
mod settings_update;
|
||||
mod helpers;
|
||||
|
||||
pub use self::clear_all::{apply_clear_all, push_clear_all};
|
||||
pub use self::customs_update::{apply_customs_update, push_customs_update};
|
||||
pub use self::documents_addition::{apply_documents_addition, apply_documents_partial_addition, DocumentsAddition};
|
||||
pub use self::documents_deletion::{apply_documents_deletion, DocumentsDeletion};
|
||||
pub use self::helpers::{index_value, value_to_string, value_to_number, discover_document_id, extract_document_id};
|
||||
pub use self::settings_update::{apply_settings_update, push_settings_update};
|
||||
|
||||
use std::cmp;
|
||||
use std::time::Instant;
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use fst::{IntoStreamer, Streamer};
|
||||
use heed::Result as ZResult;
|
||||
use indexmap::IndexMap;
|
||||
use log::debug;
|
||||
use sdset::Set;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::Value;
|
||||
|
||||
use meilisearch_error::ErrorCode;
|
||||
use meilisearch_types::DocumentId;
|
||||
|
||||
use crate::{store, MResult, RankedMap};
|
||||
use crate::database::{MainT, UpdateT};
|
||||
use crate::settings::SettingsUpdate;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Update {
|
||||
data: UpdateData,
|
||||
enqueued_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
impl Update {
|
||||
fn clear_all() -> Update {
|
||||
Update {
|
||||
data: UpdateData::ClearAll,
|
||||
enqueued_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
fn customs(data: Vec<u8>) -> Update {
|
||||
Update {
|
||||
data: UpdateData::Customs(data),
|
||||
enqueued_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
fn documents_addition(primary_key: Option<String>, documents: Vec<IndexMap<String, Value>>) -> Update {
|
||||
Update {
|
||||
data: UpdateData::DocumentsAddition{ documents, primary_key },
|
||||
enqueued_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
fn documents_partial(primary_key: Option<String>, documents: Vec<IndexMap<String, Value>>) -> Update {
|
||||
Update {
|
||||
data: UpdateData::DocumentsPartial{ documents, primary_key },
|
||||
enqueued_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
fn documents_deletion(data: Vec<String>) -> Update {
|
||||
Update {
|
||||
data: UpdateData::DocumentsDeletion(data),
|
||||
enqueued_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
fn settings(data: SettingsUpdate) -> Update {
|
||||
Update {
|
||||
data: UpdateData::Settings(Box::new(data)),
|
||||
enqueued_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum UpdateData {
|
||||
ClearAll,
|
||||
Customs(Vec<u8>),
|
||||
// (primary key, documents)
|
||||
DocumentsAddition {
|
||||
primary_key: Option<String>,
|
||||
documents: Vec<IndexMap<String, Value>>
|
||||
},
|
||||
DocumentsPartial {
|
||||
primary_key: Option<String>,
|
||||
documents: Vec<IndexMap<String, Value>>,
|
||||
},
|
||||
DocumentsDeletion(Vec<String>),
|
||||
Settings(Box<SettingsUpdate>)
|
||||
}
|
||||
|
||||
impl UpdateData {
|
||||
pub fn update_type(&self) -> UpdateType {
|
||||
match self {
|
||||
UpdateData::ClearAll => UpdateType::ClearAll,
|
||||
UpdateData::Customs(_) => UpdateType::Customs,
|
||||
UpdateData::DocumentsAddition{ documents, .. } => UpdateType::DocumentsAddition {
|
||||
number: documents.len(),
|
||||
},
|
||||
UpdateData::DocumentsPartial{ documents, .. } => UpdateType::DocumentsPartial {
|
||||
number: documents.len(),
|
||||
},
|
||||
UpdateData::DocumentsDeletion(deletion) => UpdateType::DocumentsDeletion {
|
||||
number: deletion.len(),
|
||||
},
|
||||
UpdateData::Settings(update) => UpdateType::Settings {
|
||||
settings: update.clone(),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(tag = "name")]
|
||||
pub enum UpdateType {
|
||||
ClearAll,
|
||||
Customs,
|
||||
DocumentsAddition { number: usize },
|
||||
DocumentsPartial { number: usize },
|
||||
DocumentsDeletion { number: usize },
|
||||
Settings { settings: Box<SettingsUpdate> },
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ProcessedUpdateResult {
|
||||
pub update_id: u64,
|
||||
#[serde(rename = "type")]
|
||||
pub update_type: UpdateType,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub error: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub error_type: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub error_code: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub error_link: Option<String>,
|
||||
pub duration: f64, // in seconds
|
||||
pub enqueued_at: DateTime<Utc>,
|
||||
pub processed_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct EnqueuedUpdateResult {
|
||||
pub update_id: u64,
|
||||
#[serde(rename = "type")]
|
||||
pub update_type: UpdateType,
|
||||
pub enqueued_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase", tag = "status")]
|
||||
pub enum UpdateStatus {
|
||||
Enqueued {
|
||||
#[serde(flatten)]
|
||||
content: EnqueuedUpdateResult,
|
||||
},
|
||||
Failed {
|
||||
#[serde(flatten)]
|
||||
content: ProcessedUpdateResult,
|
||||
},
|
||||
Processed {
|
||||
#[serde(flatten)]
|
||||
content: ProcessedUpdateResult,
|
||||
},
|
||||
}
|
||||
|
||||
pub fn update_status(
|
||||
update_reader: &heed::RoTxn<UpdateT>,
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
update_id: u64,
|
||||
) -> MResult<Option<UpdateStatus>> {
|
||||
match updates_results_store.update_result(update_reader, update_id)? {
|
||||
Some(result) => {
|
||||
if result.error.is_some() {
|
||||
Ok(Some(UpdateStatus::Failed { content: result }))
|
||||
} else {
|
||||
Ok(Some(UpdateStatus::Processed { content: result }))
|
||||
}
|
||||
},
|
||||
None => match updates_store.get(update_reader, update_id)? {
|
||||
Some(update) => Ok(Some(UpdateStatus::Enqueued {
|
||||
content: EnqueuedUpdateResult {
|
||||
update_id,
|
||||
update_type: update.data.update_type(),
|
||||
enqueued_at: update.enqueued_at,
|
||||
},
|
||||
})),
|
||||
None => Ok(None),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn next_update_id(
|
||||
update_writer: &mut heed::RwTxn<UpdateT>,
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
) -> ZResult<u64> {
|
||||
let last_update = updates_store.last_update(update_writer)?;
|
||||
let last_update = last_update.map(|(n, _)| n);
|
||||
|
||||
let last_update_results_id = updates_results_store.last_update(update_writer)?;
|
||||
let last_update_results_id = last_update_results_id.map(|(n, _)| n);
|
||||
|
||||
let max_update_id = cmp::max(last_update, last_update_results_id);
|
||||
let new_update_id = max_update_id.map_or(0, |n| n + 1);
|
||||
|
||||
Ok(new_update_id)
|
||||
}
|
||||
|
||||
pub fn update_task(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
index: &store::Index,
|
||||
update_id: u64,
|
||||
update: Update,
|
||||
) -> MResult<ProcessedUpdateResult> {
|
||||
debug!("Processing update number {}", update_id);
|
||||
|
||||
let Update { enqueued_at, data } = update;
|
||||
|
||||
let (update_type, result, duration) = match data {
|
||||
UpdateData::ClearAll => {
|
||||
let start = Instant::now();
|
||||
|
||||
let update_type = UpdateType::ClearAll;
|
||||
let result = apply_clear_all(writer, index);
|
||||
|
||||
(update_type, result, start.elapsed())
|
||||
}
|
||||
UpdateData::Customs(customs) => {
|
||||
let start = Instant::now();
|
||||
|
||||
let update_type = UpdateType::Customs;
|
||||
let result = apply_customs_update(writer, index.main, &customs).map_err(Into::into);
|
||||
|
||||
(update_type, result, start.elapsed())
|
||||
}
|
||||
UpdateData::DocumentsAddition { documents, primary_key } => {
|
||||
let start = Instant::now();
|
||||
|
||||
let update_type = UpdateType::DocumentsAddition {
|
||||
number: documents.len(),
|
||||
};
|
||||
|
||||
let result = apply_documents_addition(writer, index, documents, primary_key);
|
||||
|
||||
(update_type, result, start.elapsed())
|
||||
}
|
||||
UpdateData::DocumentsPartial{ documents, primary_key } => {
|
||||
let start = Instant::now();
|
||||
|
||||
let update_type = UpdateType::DocumentsPartial {
|
||||
number: documents.len(),
|
||||
};
|
||||
|
||||
let result = apply_documents_partial_addition(writer, index, documents, primary_key);
|
||||
|
||||
(update_type, result, start.elapsed())
|
||||
}
|
||||
UpdateData::DocumentsDeletion(documents) => {
|
||||
let start = Instant::now();
|
||||
|
||||
let update_type = UpdateType::DocumentsDeletion {
|
||||
number: documents.len(),
|
||||
};
|
||||
|
||||
let result = apply_documents_deletion(writer, index, documents);
|
||||
|
||||
(update_type, result, start.elapsed())
|
||||
}
|
||||
UpdateData::Settings(settings) => {
|
||||
let start = Instant::now();
|
||||
|
||||
let update_type = UpdateType::Settings {
|
||||
settings: settings.clone(),
|
||||
};
|
||||
|
||||
let result = apply_settings_update(
|
||||
writer,
|
||||
index,
|
||||
*settings,
|
||||
);
|
||||
|
||||
(update_type, result, start.elapsed())
|
||||
}
|
||||
};
|
||||
|
||||
debug!(
|
||||
"Processed update number {} {:?} {:?}",
|
||||
update_id, update_type, result
|
||||
);
|
||||
|
||||
let status = ProcessedUpdateResult {
|
||||
update_id,
|
||||
update_type,
|
||||
error: result.as_ref().map_err(|e| e.to_string()).err(),
|
||||
error_code: result.as_ref().map_err(|e| e.error_name()).err(),
|
||||
error_type: result.as_ref().map_err(|e| e.error_type()).err(),
|
||||
error_link: result.as_ref().map_err(|e| e.error_url()).err(),
|
||||
duration: duration.as_secs_f64(),
|
||||
enqueued_at,
|
||||
processed_at: Utc::now(),
|
||||
};
|
||||
|
||||
Ok(status)
|
||||
}
|
||||
|
||||
fn compute_short_prefixes<A>(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
words_fst: &fst::Set<A>,
|
||||
index: &store::Index,
|
||||
) -> MResult<()>
|
||||
where A: AsRef<[u8]>,
|
||||
{
|
||||
// clear the prefixes
|
||||
let pplc_store = index.prefix_postings_lists_cache;
|
||||
pplc_store.clear(writer)?;
|
||||
|
||||
for prefix_len in 1..=2 {
|
||||
// compute prefixes and store those in the PrefixPostingsListsCache store.
|
||||
let mut previous_prefix: Option<([u8; 4], Vec<_>)> = None;
|
||||
let mut stream = words_fst.into_stream();
|
||||
while let Some(input) = stream.next() {
|
||||
|
||||
// We skip the prefixes that are shorter than the current length
|
||||
// we want to cache (<). We must ignore the input when it is exactly the
|
||||
// same word as the prefix because if we match exactly on it we need
|
||||
// to consider it as an exact match and not as a prefix (=).
|
||||
if input.len() <= prefix_len { continue }
|
||||
|
||||
if let Some(postings_list) = index.postings_lists.postings_list(writer, input)?.map(|p| p.matches.into_owned()) {
|
||||
let prefix = &input[..prefix_len];
|
||||
|
||||
let mut arr_prefix = [0; 4];
|
||||
arr_prefix[..prefix_len].copy_from_slice(prefix);
|
||||
|
||||
match previous_prefix {
|
||||
Some((ref mut prev_prefix, ref mut prev_pl)) if *prev_prefix != arr_prefix => {
|
||||
prev_pl.sort_unstable();
|
||||
prev_pl.dedup();
|
||||
|
||||
if let Ok(prefix) = std::str::from_utf8(&prev_prefix[..prefix_len]) {
|
||||
debug!("writing the prefix of {:?} of length {}", prefix, prev_pl.len());
|
||||
}
|
||||
|
||||
let pls = Set::new_unchecked(&prev_pl);
|
||||
pplc_store.put_prefix_postings_list(writer, *prev_prefix, &pls)?;
|
||||
|
||||
*prev_prefix = arr_prefix;
|
||||
prev_pl.clear();
|
||||
prev_pl.extend_from_slice(&postings_list);
|
||||
},
|
||||
Some((_, ref mut prev_pl)) => prev_pl.extend_from_slice(&postings_list),
|
||||
None => previous_prefix = Some((arr_prefix, postings_list.to_vec())),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// write the last prefix postings lists
|
||||
if let Some((prev_prefix, mut prev_pl)) = previous_prefix.take() {
|
||||
prev_pl.sort_unstable();
|
||||
prev_pl.dedup();
|
||||
|
||||
let pls = Set::new_unchecked(&prev_pl);
|
||||
pplc_store.put_prefix_postings_list(writer, prev_prefix, &pls)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn cache_document_ids_sorted(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
ranked_map: &RankedMap,
|
||||
index: &store::Index,
|
||||
document_ids: &mut [DocumentId],
|
||||
) -> MResult<()> {
|
||||
crate::bucket_sort::placeholder_document_sort(document_ids, index, writer, ranked_map)?;
|
||||
index.main.put_sorted_document_ids_cache(writer, &document_ids)
|
||||
}
|
@ -1,332 +0,0 @@
|
||||
use std::{borrow::Cow, collections::{BTreeMap, BTreeSet}};
|
||||
|
||||
use heed::Result as ZResult;
|
||||
use fst::{SetBuilder, set::OpBuilder};
|
||||
use sdset::SetBuf;
|
||||
use meilisearch_schema::Schema;
|
||||
use meilisearch_tokenizer::analyzer::{Analyzer, AnalyzerConfig};
|
||||
|
||||
use crate::database::{MainT, UpdateT};
|
||||
use crate::settings::{UpdateState, SettingsUpdate, RankingRule};
|
||||
use crate::update::documents_addition::reindex_all_documents;
|
||||
use crate::update::{next_update_id, Update};
|
||||
use crate::{store, MResult, Error};
|
||||
|
||||
pub fn push_settings_update(
|
||||
writer: &mut heed::RwTxn<UpdateT>,
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
settings: SettingsUpdate,
|
||||
) -> ZResult<u64> {
|
||||
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
|
||||
|
||||
let update = Update::settings(settings);
|
||||
updates_store.put_update(writer, last_update_id, &update)?;
|
||||
|
||||
Ok(last_update_id)
|
||||
}
|
||||
|
||||
pub fn apply_settings_update(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
index: &store::Index,
|
||||
settings: SettingsUpdate,
|
||||
) -> MResult<()> {
|
||||
let mut must_reindex = false;
|
||||
|
||||
let mut schema = match index.main.schema(writer)? {
|
||||
Some(schema) => schema,
|
||||
None => {
|
||||
match settings.primary_key.clone() {
|
||||
UpdateState::Update(id) => Schema::with_primary_key(&id),
|
||||
_ => return Err(Error::MissingPrimaryKey)
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
match settings.ranking_rules {
|
||||
UpdateState::Update(v) => {
|
||||
let ranked_field: Vec<&str> = v.iter().filter_map(RankingRule::field).collect();
|
||||
schema.update_ranked(&ranked_field)?;
|
||||
index.main.put_ranking_rules(writer, &v)?;
|
||||
must_reindex = true;
|
||||
},
|
||||
UpdateState::Clear => {
|
||||
index.main.delete_ranking_rules(writer)?;
|
||||
schema.clear_ranked();
|
||||
must_reindex = true;
|
||||
},
|
||||
UpdateState::Nothing => (),
|
||||
}
|
||||
|
||||
match settings.distinct_attribute {
|
||||
UpdateState::Update(v) => {
|
||||
let field_id = schema.insert(&v)?;
|
||||
index.main.put_distinct_attribute(writer, field_id)?;
|
||||
},
|
||||
UpdateState::Clear => {
|
||||
index.main.delete_distinct_attribute(writer)?;
|
||||
},
|
||||
UpdateState::Nothing => (),
|
||||
}
|
||||
|
||||
match settings.searchable_attributes.clone() {
|
||||
UpdateState::Update(v) => {
|
||||
if v.iter().any(|e| e == "*") || v.is_empty() {
|
||||
schema.set_all_searchable();
|
||||
} else {
|
||||
schema.update_searchable(v)?;
|
||||
}
|
||||
must_reindex = true;
|
||||
},
|
||||
UpdateState::Clear => {
|
||||
schema.set_all_searchable();
|
||||
must_reindex = true;
|
||||
},
|
||||
UpdateState::Nothing => (),
|
||||
}
|
||||
match settings.displayed_attributes.clone() {
|
||||
UpdateState::Update(v) => {
|
||||
if v.contains("*") || v.is_empty() {
|
||||
schema.set_all_displayed();
|
||||
} else {
|
||||
schema.update_displayed(v)?
|
||||
}
|
||||
},
|
||||
UpdateState::Clear => {
|
||||
schema.set_all_displayed();
|
||||
},
|
||||
UpdateState::Nothing => (),
|
||||
}
|
||||
|
||||
match settings.attributes_for_faceting {
|
||||
UpdateState::Update(attrs) => {
|
||||
apply_attributes_for_faceting_update(writer, index, &mut schema, &attrs)?;
|
||||
must_reindex = true;
|
||||
},
|
||||
UpdateState::Clear => {
|
||||
index.main.delete_attributes_for_faceting(writer)?;
|
||||
index.facets.clear(writer)?;
|
||||
},
|
||||
UpdateState::Nothing => (),
|
||||
}
|
||||
|
||||
index.main.put_schema(writer, &schema)?;
|
||||
|
||||
match settings.stop_words {
|
||||
UpdateState::Update(stop_words) => {
|
||||
if apply_stop_words_update(writer, index, stop_words)? {
|
||||
must_reindex = true;
|
||||
}
|
||||
},
|
||||
UpdateState::Clear => {
|
||||
if apply_stop_words_update(writer, index, BTreeSet::new())? {
|
||||
must_reindex = true;
|
||||
}
|
||||
},
|
||||
UpdateState::Nothing => (),
|
||||
}
|
||||
|
||||
match settings.synonyms {
|
||||
UpdateState::Update(synonyms) => apply_synonyms_update(writer, index, synonyms)?,
|
||||
UpdateState::Clear => apply_synonyms_update(writer, index, BTreeMap::new())?,
|
||||
UpdateState::Nothing => (),
|
||||
}
|
||||
|
||||
if must_reindex {
|
||||
reindex_all_documents(writer, index)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn apply_attributes_for_faceting_update(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
index: &store::Index,
|
||||
schema: &mut Schema,
|
||||
attributes: &[String]
|
||||
) -> MResult<()> {
|
||||
let mut attribute_ids = Vec::new();
|
||||
for name in attributes {
|
||||
attribute_ids.push(schema.insert(name)?);
|
||||
}
|
||||
let attributes_for_faceting = SetBuf::from_dirty(attribute_ids);
|
||||
index.main.put_attributes_for_faceting(writer, &attributes_for_faceting)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn apply_stop_words_update(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
index: &store::Index,
|
||||
stop_words: BTreeSet<String>,
|
||||
) -> MResult<bool>
|
||||
{
|
||||
let mut must_reindex = false;
|
||||
|
||||
let old_stop_words: BTreeSet<String> = index.main
|
||||
.stop_words_fst(writer)?
|
||||
.stream()
|
||||
.into_strs()?
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
let deletion: BTreeSet<String> = old_stop_words.difference(&stop_words).cloned().collect();
|
||||
let addition: BTreeSet<String> = stop_words.difference(&old_stop_words).cloned().collect();
|
||||
|
||||
if !addition.is_empty() {
|
||||
apply_stop_words_addition(writer, index, addition)?;
|
||||
}
|
||||
|
||||
if !deletion.is_empty() {
|
||||
must_reindex = true;
|
||||
apply_stop_words_deletion(writer, index, deletion)?;
|
||||
}
|
||||
|
||||
let words_fst = index.main.words_fst(writer)?;
|
||||
if !words_fst.is_empty() {
|
||||
let stop_words = fst::Set::from_iter(stop_words)?;
|
||||
let op = OpBuilder::new()
|
||||
.add(&words_fst)
|
||||
.add(&stop_words)
|
||||
.difference();
|
||||
|
||||
let mut builder = fst::SetBuilder::memory();
|
||||
builder.extend_stream(op)?;
|
||||
let words_fst = builder.into_set();
|
||||
|
||||
index.main.put_words_fst(writer, &words_fst)?;
|
||||
index.main.put_stop_words_fst(writer, &stop_words)?;
|
||||
}
|
||||
|
||||
Ok(must_reindex)
|
||||
}
|
||||
|
||||
fn apply_stop_words_addition(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
index: &store::Index,
|
||||
addition: BTreeSet<String>,
|
||||
) -> MResult<()>
|
||||
{
|
||||
let main_store = index.main;
|
||||
let postings_lists_store = index.postings_lists;
|
||||
|
||||
let mut stop_words_builder = SetBuilder::memory();
|
||||
|
||||
for word in addition {
|
||||
stop_words_builder.insert(&word)?;
|
||||
// we remove every posting list associated to a new stop word
|
||||
postings_lists_store.del_postings_list(writer, word.as_bytes())?;
|
||||
}
|
||||
|
||||
// create the new delta stop words fst
|
||||
let delta_stop_words = stop_words_builder.into_set();
|
||||
|
||||
// we also need to remove all the stop words from the main fst
|
||||
let words_fst = main_store.words_fst(writer)?;
|
||||
if !words_fst.is_empty() {
|
||||
let op = OpBuilder::new()
|
||||
.add(&words_fst)
|
||||
.add(&delta_stop_words)
|
||||
.difference();
|
||||
|
||||
let mut word_fst_builder = SetBuilder::memory();
|
||||
word_fst_builder.extend_stream(op)?;
|
||||
let word_fst = word_fst_builder.into_set();
|
||||
|
||||
main_store.put_words_fst(writer, &word_fst)?;
|
||||
}
|
||||
|
||||
// now we add all of these stop words from the main store
|
||||
let stop_words_fst = main_store.stop_words_fst(writer)?;
|
||||
|
||||
let op = OpBuilder::new()
|
||||
.add(&stop_words_fst)
|
||||
.add(&delta_stop_words)
|
||||
.r#union();
|
||||
|
||||
let mut stop_words_builder = SetBuilder::memory();
|
||||
stop_words_builder.extend_stream(op)?;
|
||||
let stop_words_fst = stop_words_builder.into_set();
|
||||
|
||||
main_store.put_stop_words_fst(writer, &stop_words_fst)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn apply_stop_words_deletion(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
index: &store::Index,
|
||||
deletion: BTreeSet<String>,
|
||||
) -> MResult<()> {
|
||||
|
||||
let mut stop_words_builder = SetBuilder::memory();
|
||||
|
||||
for word in deletion {
|
||||
stop_words_builder.insert(&word)?;
|
||||
}
|
||||
|
||||
// create the new delta stop words fst
|
||||
let delta_stop_words = stop_words_builder.into_set();
|
||||
|
||||
// now we delete all of these stop words from the main store
|
||||
let stop_words_fst = index.main.stop_words_fst(writer)?;
|
||||
|
||||
let op = OpBuilder::new()
|
||||
.add(&stop_words_fst)
|
||||
.add(&delta_stop_words)
|
||||
.difference();
|
||||
|
||||
let mut stop_words_builder = SetBuilder::memory();
|
||||
stop_words_builder.extend_stream(op)?;
|
||||
let stop_words_fst = stop_words_builder.into_set();
|
||||
|
||||
Ok(index.main.put_stop_words_fst(writer, &stop_words_fst)?)
|
||||
}
|
||||
|
||||
pub fn apply_synonyms_update(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
index: &store::Index,
|
||||
synonyms: BTreeMap<String, Vec<String>>,
|
||||
) -> MResult<()> {
|
||||
|
||||
let main_store = index.main;
|
||||
let synonyms_store = index.synonyms;
|
||||
let stop_words = index.main.stop_words_fst(writer)?.map_data(Cow::into_owned)?;
|
||||
let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words));
|
||||
|
||||
fn normalize<T: AsRef<[u8]>>(analyzer: &Analyzer<T>, text: &str) -> String {
|
||||
analyzer.analyze(&text)
|
||||
.tokens()
|
||||
.fold(String::new(), |s, t| s + t.text())
|
||||
}
|
||||
|
||||
// normalize synonyms and reorder them creating a BTreeMap
|
||||
let synonyms: BTreeMap<String, Vec<String>> = synonyms.into_iter().map( |(word, alternatives)| {
|
||||
let word = normalize(&analyzer, &word);
|
||||
let alternatives = alternatives.into_iter().map(|text| normalize(&analyzer, &text)).collect();
|
||||
|
||||
(word, alternatives)
|
||||
}).collect();
|
||||
|
||||
// index synonyms,
|
||||
// synyonyms have to be ordered by key before indexation
|
||||
let mut synonyms_builder = SetBuilder::memory();
|
||||
synonyms_store.clear(writer)?;
|
||||
for (word, alternatives) in synonyms {
|
||||
synonyms_builder.insert(&word)?;
|
||||
|
||||
let alternatives = {
|
||||
let alternatives = SetBuf::from_dirty(alternatives);
|
||||
let mut alternatives_builder = SetBuilder::memory();
|
||||
alternatives_builder.extend_iter(alternatives)?;
|
||||
alternatives_builder.into_set()
|
||||
};
|
||||
|
||||
synonyms_store.put_synonyms(writer, word.as_bytes(), &alternatives)?;
|
||||
}
|
||||
|
||||
let synonyms_set = synonyms_builder.into_set();
|
||||
|
||||
main_store.put_synonyms_fst(writer, &synonyms_set)?;
|
||||
|
||||
Ok(())
|
||||
}
|
@ -1,8 +0,0 @@
|
||||
[package]
|
||||
name = "meilisearch-error"
|
||||
version = "0.20.0"
|
||||
authors = ["marin <postma.marin@protonmail.com>"]
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
actix-http = "2.2.0"
|
@ -1,185 +0,0 @@
|
||||
use std::fmt;
|
||||
|
||||
use actix_http::http::StatusCode;
|
||||
|
||||
pub trait ErrorCode: std::error::Error {
|
||||
fn error_code(&self) -> Code;
|
||||
|
||||
/// returns the HTTP status code ascociated with the error
|
||||
fn http_status(&self) -> StatusCode {
|
||||
self.error_code().http()
|
||||
}
|
||||
|
||||
/// returns the doc url ascociated with the error
|
||||
fn error_url(&self) -> String {
|
||||
self.error_code().url()
|
||||
}
|
||||
|
||||
/// returns error name, used as error code
|
||||
fn error_name(&self) -> String {
|
||||
self.error_code().name()
|
||||
}
|
||||
|
||||
/// return the error type
|
||||
fn error_type(&self) -> String {
|
||||
self.error_code().type_()
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::enum_variant_names)]
|
||||
enum ErrorType {
|
||||
InternalError,
|
||||
InvalidRequestError,
|
||||
AuthenticationError,
|
||||
}
|
||||
|
||||
impl fmt::Display for ErrorType {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
use ErrorType::*;
|
||||
|
||||
match self {
|
||||
InternalError => write!(f, "internal_error"),
|
||||
InvalidRequestError => write!(f, "invalid_request_error"),
|
||||
AuthenticationError => write!(f, "authentication_error"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub enum Code {
|
||||
// index related error
|
||||
CreateIndex,
|
||||
IndexAlreadyExists,
|
||||
IndexNotFound,
|
||||
InvalidIndexUid,
|
||||
OpenIndex,
|
||||
|
||||
// invalid state error
|
||||
InvalidState,
|
||||
MissingPrimaryKey,
|
||||
PrimaryKeyAlreadyPresent,
|
||||
|
||||
MaxFieldsLimitExceeded,
|
||||
MissingDocumentId,
|
||||
|
||||
Facet,
|
||||
Filter,
|
||||
|
||||
BadParameter,
|
||||
BadRequest,
|
||||
DocumentNotFound,
|
||||
Internal,
|
||||
InvalidToken,
|
||||
MissingAuthorizationHeader,
|
||||
NotFound,
|
||||
PayloadTooLarge,
|
||||
RetrieveDocument,
|
||||
SearchDocuments,
|
||||
UnsupportedMediaType,
|
||||
|
||||
DumpAlreadyInProgress,
|
||||
DumpProcessFailed,
|
||||
}
|
||||
|
||||
impl Code {
|
||||
|
||||
/// ascociate a `Code` variant to the actual ErrCode
|
||||
fn err_code(&self) -> ErrCode {
|
||||
use Code::*;
|
||||
|
||||
match self {
|
||||
// index related errors
|
||||
// create index is thrown on internal error while creating an index.
|
||||
CreateIndex => ErrCode::internal("index_creation_failed", StatusCode::BAD_REQUEST),
|
||||
IndexAlreadyExists => ErrCode::invalid("index_already_exists", StatusCode::BAD_REQUEST),
|
||||
// thrown when requesting an unexisting index
|
||||
IndexNotFound => ErrCode::invalid("index_not_found", StatusCode::NOT_FOUND),
|
||||
InvalidIndexUid => ErrCode::invalid("invalid_index_uid", StatusCode::BAD_REQUEST),
|
||||
OpenIndex => ErrCode::internal("index_not_accessible", StatusCode::INTERNAL_SERVER_ERROR),
|
||||
|
||||
// invalid state error
|
||||
InvalidState => ErrCode::internal("invalid_state", StatusCode::INTERNAL_SERVER_ERROR),
|
||||
// thrown when no primary key has been set
|
||||
MissingPrimaryKey => ErrCode::invalid("missing_primary_key", StatusCode::BAD_REQUEST),
|
||||
// error thrown when trying to set an already existing primary key
|
||||
PrimaryKeyAlreadyPresent => ErrCode::invalid("primary_key_already_present", StatusCode::BAD_REQUEST),
|
||||
|
||||
// invalid document
|
||||
MaxFieldsLimitExceeded => ErrCode::invalid("max_fields_limit_exceeded", StatusCode::BAD_REQUEST),
|
||||
MissingDocumentId => ErrCode::invalid("missing_document_id", StatusCode::BAD_REQUEST),
|
||||
|
||||
// error related to facets
|
||||
Facet => ErrCode::invalid("invalid_facet", StatusCode::BAD_REQUEST),
|
||||
// error related to filters
|
||||
Filter => ErrCode::invalid("invalid_filter", StatusCode::BAD_REQUEST),
|
||||
|
||||
BadParameter => ErrCode::invalid("bad_parameter", StatusCode::BAD_REQUEST),
|
||||
BadRequest => ErrCode::invalid("bad_request", StatusCode::BAD_REQUEST),
|
||||
DocumentNotFound => ErrCode::invalid("document_not_found", StatusCode::NOT_FOUND),
|
||||
Internal => ErrCode::internal("internal", StatusCode::INTERNAL_SERVER_ERROR),
|
||||
InvalidToken => ErrCode::authentication("invalid_token", StatusCode::FORBIDDEN),
|
||||
MissingAuthorizationHeader => ErrCode::authentication("missing_authorization_header", StatusCode::UNAUTHORIZED),
|
||||
NotFound => ErrCode::invalid("not_found", StatusCode::NOT_FOUND),
|
||||
PayloadTooLarge => ErrCode::invalid("payload_too_large", StatusCode::PAYLOAD_TOO_LARGE),
|
||||
RetrieveDocument => ErrCode::internal("unretrievable_document", StatusCode::BAD_REQUEST),
|
||||
SearchDocuments => ErrCode::internal("search_error", StatusCode::BAD_REQUEST),
|
||||
UnsupportedMediaType => ErrCode::invalid("unsupported_media_type", StatusCode::UNSUPPORTED_MEDIA_TYPE),
|
||||
|
||||
// error related to dump
|
||||
DumpAlreadyInProgress => ErrCode::invalid("dump_already_in_progress", StatusCode::CONFLICT),
|
||||
DumpProcessFailed => ErrCode::internal("dump_process_failed", StatusCode::INTERNAL_SERVER_ERROR),
|
||||
}
|
||||
}
|
||||
|
||||
/// return the HTTP status code ascociated with the `Code`
|
||||
fn http(&self) -> StatusCode {
|
||||
self.err_code().status_code
|
||||
}
|
||||
|
||||
/// return error name, used as error code
|
||||
fn name(&self) -> String {
|
||||
self.err_code().error_name.to_string()
|
||||
}
|
||||
|
||||
/// return the error type
|
||||
fn type_(&self) -> String {
|
||||
self.err_code().error_type.to_string()
|
||||
}
|
||||
|
||||
/// return the doc url ascociated with the error
|
||||
fn url(&self) -> String {
|
||||
format!("https://docs.meilisearch.com/errors#{}", self.name())
|
||||
}
|
||||
}
|
||||
|
||||
/// Internal structure providing a convenient way to create error codes
|
||||
struct ErrCode {
|
||||
status_code: StatusCode,
|
||||
error_type: ErrorType,
|
||||
error_name: &'static str,
|
||||
}
|
||||
|
||||
impl ErrCode {
|
||||
fn authentication(error_name: &'static str, status_code: StatusCode) -> ErrCode {
|
||||
ErrCode {
|
||||
status_code,
|
||||
error_name,
|
||||
error_type: ErrorType::AuthenticationError,
|
||||
}
|
||||
}
|
||||
|
||||
fn internal(error_name: &'static str, status_code: StatusCode) -> ErrCode {
|
||||
ErrCode {
|
||||
status_code,
|
||||
error_name,
|
||||
error_type: ErrorType::InternalError,
|
||||
}
|
||||
}
|
||||
|
||||
fn invalid(error_name: &'static str, status_code: StatusCode) -> ErrCode {
|
||||
ErrCode {
|
||||
status_code,
|
||||
error_name,
|
||||
error_type: ErrorType::InvalidRequestError,
|
||||
}
|
||||
}
|
||||
}
|
@ -1,86 +0,0 @@
|
||||
[package]
|
||||
name = "meilisearch-http"
|
||||
description = "MeiliSearch HTTP server"
|
||||
version = "0.20.0"
|
||||
license = "MIT"
|
||||
authors = [
|
||||
"Quentin de Quelen <quentin@dequelen.me>",
|
||||
"Clément Renault <clement@meilisearch.com>",
|
||||
]
|
||||
edition = "2018"
|
||||
|
||||
[[bin]]
|
||||
name = "meilisearch"
|
||||
path = "src/main.rs"
|
||||
|
||||
[features]
|
||||
default = ["sentry"]
|
||||
|
||||
[dependencies]
|
||||
actix-cors = "0.5.4"
|
||||
actix-http = "2.2.0"
|
||||
actix-rt = "1.1.1"
|
||||
actix-service = "1.0.6"
|
||||
actix-web = { version = "3.3.2", features = ["rustls"] }
|
||||
bytes = "1.0.0"
|
||||
chrono = { version = "0.4.19", features = ["serde"] }
|
||||
crossbeam-channel = "0.5.0"
|
||||
env_logger = "0.8.2"
|
||||
flate2 = "1.0.19"
|
||||
futures = "0.3.8"
|
||||
http = "0.2.2"
|
||||
indexmap = { version = "1.6.1", features = ["serde-1"] }
|
||||
log = "0.4.11"
|
||||
main_error = "0.1.1"
|
||||
meilisearch-core = { path = "../meilisearch-core", version = "0.20.0" }
|
||||
meilisearch-error = { path = "../meilisearch-error", version = "0.20.0" }
|
||||
meilisearch-schema = { path = "../meilisearch-schema", version = "0.20.0" }
|
||||
mime = "0.3.16"
|
||||
once_cell = "1.5.2"
|
||||
rand = "0.8.1"
|
||||
regex = "1.4.2"
|
||||
rustls = "0.18.0"
|
||||
serde = { version = "1.0.118", features = ["derive"] }
|
||||
serde_json = { version = "1.0.61", features = ["preserve_order"] }
|
||||
serde_qs = "0.8.2"
|
||||
sha2 = "0.9.2"
|
||||
siphasher = "0.3.3"
|
||||
slice-group-by = "0.2.6"
|
||||
structopt = "0.3.21"
|
||||
tar = "0.4.30"
|
||||
tempfile = "3.1.0"
|
||||
tokio = { version = "0.2", features = ["macros"] }
|
||||
ureq = { version = "2.0.0", features = ["tls"], default-features = false }
|
||||
uuid = "0.8"
|
||||
walkdir = "2.3.1"
|
||||
whoami = "1.0.3"
|
||||
|
||||
[dependencies.sentry]
|
||||
version = "0.18.1"
|
||||
default-features = false
|
||||
features = [
|
||||
"with_client_implementation",
|
||||
"with_panic",
|
||||
"with_failure",
|
||||
"with_device_info",
|
||||
"with_rust_info",
|
||||
"with_reqwest_transport",
|
||||
"with_rustls",
|
||||
"with_env_logger"
|
||||
]
|
||||
optional = true
|
||||
|
||||
[dev-dependencies]
|
||||
serde_url_params = "0.2.0"
|
||||
tempdir = "0.3.7"
|
||||
tokio = { version = "0.2", features = ["macros", "time"] }
|
||||
|
||||
[dev-dependencies.assert-json-diff]
|
||||
git = "https://github.com/qdequele/assert-json-diff"
|
||||
branch = "master"
|
||||
|
||||
[build-dependencies]
|
||||
vergen = "3.1.0"
|
||||
|
||||
[target.'cfg(target_os = "linux")'.dependencies]
|
||||
jemallocator = "0.3.2"
|
@ -1,10 +0,0 @@
|
||||
use vergen::{generate_cargo_keys, ConstantsFlags};
|
||||
|
||||
fn main() {
|
||||
// Setup the flags, toggling off the 'SEMVER_FROM_CARGO_PKG' flag
|
||||
let mut flags = ConstantsFlags::all();
|
||||
flags.toggle(ConstantsFlags::SEMVER_FROM_CARGO_PKG);
|
||||
|
||||
// Generate the 'cargo:' key output
|
||||
generate_cargo_keys(ConstantsFlags::all()).expect("Unable to generate the cargo keys!");
|
||||
}
|
1
meilisearch-http/public/bulma.min.css
vendored
1
meilisearch-http/public/bulma.min.css
vendored
File diff suppressed because one or more lines are too long
@ -1,364 +0,0 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<link rel="stylesheet" href="bulma.min.css">
|
||||
<title>MeiliSearch</title>
|
||||
<style>
|
||||
em {
|
||||
color: hsl(204, 86%, 25%);
|
||||
font-style: inherit;
|
||||
background-color: hsl(204, 86%, 88%);
|
||||
}
|
||||
|
||||
#results {
|
||||
max-width: 900px;
|
||||
margin: 20px auto 0 auto;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
.notification {
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
}
|
||||
|
||||
.level-left {
|
||||
margin-right: 50px;
|
||||
}
|
||||
|
||||
.document {
|
||||
border-radius: 4px;
|
||||
margin-bottom: 20px;
|
||||
display: flex;
|
||||
}
|
||||
|
||||
.document ol {
|
||||
flex: 0 0 75%;
|
||||
max-width: 75%;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
list-style-type: none;
|
||||
}
|
||||
|
||||
.document ol li {
|
||||
list-style: none;
|
||||
}
|
||||
|
||||
|
||||
.document .image {
|
||||
max-width: 50%;
|
||||
margin: 0 auto;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
@media screen and (min-width: 770px) {
|
||||
.document .image {
|
||||
max-width: 25%;
|
||||
flex: 0 0 25%;
|
||||
margin: 0;
|
||||
padding-left: 30px;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
}
|
||||
|
||||
.document .image img {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.attribute {
|
||||
text-align: center;
|
||||
box-sizing: border-box;
|
||||
text-transform: uppercase;
|
||||
font-weight: bold;
|
||||
color: rgba(0,0,0,.7);
|
||||
}
|
||||
|
||||
@media screen and (min-width: 770px) {
|
||||
.attribute {
|
||||
flex: 0 0 25%;
|
||||
max-width: 25%;
|
||||
text-align: right;
|
||||
padding-right: 10px;
|
||||
font-weight: normal;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
}
|
||||
@media screen and (max-width: 770px) {
|
||||
.attribute {
|
||||
padding-bottom: 0;
|
||||
}
|
||||
}
|
||||
|
||||
.content {
|
||||
flex: 0 0 75%;
|
||||
box-sizing: border-box;
|
||||
color: rgba(0,0,0,.9);
|
||||
overflow-wrap: anywhere;
|
||||
}
|
||||
|
||||
.hero-foot {
|
||||
padding-bottom: 3rem;
|
||||
}
|
||||
|
||||
@media screen and (max-width: 770px) {
|
||||
.align-on-mobile {
|
||||
text-align: center;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<section class="hero is-light">
|
||||
|
||||
<div class="hero-body">
|
||||
<div class="container">
|
||||
<div class="content is-medium align-on-mobile">
|
||||
<h1 class="title is-1 is-spaced">
|
||||
Welcome to MeiliSearch
|
||||
</h1>
|
||||
<p class="subtitle is-4">
|
||||
This dashboard will help you check the search results with ease.
|
||||
</p>
|
||||
</div>
|
||||
<div id="apiKeyContainer" class="columns">
|
||||
<input type="hidden" id="apiKey">
|
||||
</div>
|
||||
<div class="columns">
|
||||
<div class="column is-8">
|
||||
<label class="label" for="search">Search something</label>
|
||||
<div class="field has-addons">
|
||||
<div class="control">
|
||||
<span class="select">
|
||||
<select role="listbox" id="index" aria-label="Select the index you want to search on">
|
||||
<!-- indexes names -->
|
||||
</select>
|
||||
</span>
|
||||
</div>
|
||||
<div class="control is-expanded">
|
||||
<input id="search" class="input" type="search" autofocus placeholder="e.g. George Clooney" aria-label="Search through your documents">
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="column is-4">
|
||||
<div class="columns">
|
||||
<div class="column is-6 has-text-centered">
|
||||
<p class="heading">Documents</p>
|
||||
<p id="count" class="title">0</p>
|
||||
</div>
|
||||
<div class="column is-6 has-text-centered">
|
||||
<p class="heading">Time Spent</p>
|
||||
<p id="time" class="title">N/A</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<div class="container">
|
||||
<ol id="results" class="content">
|
||||
<!-- documents matching resquests -->
|
||||
</ol>
|
||||
</div>
|
||||
</section>
|
||||
</body>
|
||||
|
||||
<script>
|
||||
function setApiKeyField () {
|
||||
var xmlHttp = new XMLHttpRequest();
|
||||
xmlHttp.open("GET", `${baseUrl}/version`, false);
|
||||
|
||||
xmlHttp.onload = function () {
|
||||
let apiKeyContainer = document.getElementById('apiKeyContainer');
|
||||
if (xmlHttp.status === 401) {
|
||||
document.getElementById('apiKey').remove();
|
||||
let inputNode = document.createElement('input');
|
||||
inputNode.setAttribute('id', 'apiKey');
|
||||
inputNode.setAttribute('type', 'password');
|
||||
inputNode.setAttribute('placeholder', 'Enter your API key');
|
||||
inputNode.classList.add('input', 'is-small');
|
||||
|
||||
let controlNode = document.createElement('div');
|
||||
controlNode.classList.add('control');
|
||||
controlNode.appendChild(inputNode);
|
||||
|
||||
let labelNode = document.createElement('label');
|
||||
labelNode.classList.add('label')
|
||||
labelNode.setAttribute('for', 'apiKey');
|
||||
let textNode = document.createTextNode('API Key');
|
||||
labelNode.appendChild(textNode);
|
||||
|
||||
let fieldNode = document.createElement('div');
|
||||
fieldNode.classList.add('field');
|
||||
fieldNode.appendChild(labelNode);
|
||||
fieldNode.append(controlNode);
|
||||
|
||||
let columnNode = document.createElement('div');
|
||||
columnNode.classList.add('column', 'is-4');
|
||||
columnNode.appendChild(fieldNode);
|
||||
apiKeyContainer.appendChild(columnNode);
|
||||
}
|
||||
}
|
||||
|
||||
xmlHttp.send(null);
|
||||
}
|
||||
|
||||
function sanitizeHTMLEntities(str) {
|
||||
if (str && typeof str === 'string') {
|
||||
str = str.replace(/</g,"<");
|
||||
str = str.replace(/>/g,">");
|
||||
str = str.replace(/<em>/g,"<em>");
|
||||
str = str.replace(/<\/em>/g,"<\/em>");
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
function httpGet(theUrl, apiKey) {
|
||||
var xmlHttp = new XMLHttpRequest();
|
||||
xmlHttp.open("GET", theUrl, false); // false for synchronous request
|
||||
if (apiKey) {
|
||||
xmlHttp.setRequestHeader("x-Meili-API-Key", apiKey);
|
||||
}
|
||||
xmlHttp.send(null);
|
||||
return xmlHttp.responseText;
|
||||
}
|
||||
|
||||
function refreshIndexList() {
|
||||
// TODO we must not block here
|
||||
let result = JSON.parse(httpGet(`${baseUrl}/indexes`, localStorage.getItem('apiKey')));
|
||||
|
||||
if (!Array.isArray(result)) { return }
|
||||
|
||||
let select = document.getElementById("index");
|
||||
select.innerHTML = '';
|
||||
|
||||
for (index of result) {
|
||||
const option = document.createElement('option');
|
||||
option.value = index.uid;
|
||||
option.innerHTML = index.name;
|
||||
select.appendChild(option);
|
||||
}
|
||||
}
|
||||
|
||||
let lastRequest = undefined;
|
||||
|
||||
function triggerSearch() {
|
||||
var e = document.getElementById("index");
|
||||
if (e.selectedIndex == -1) { return }
|
||||
var index = e.options[e.selectedIndex].value;
|
||||
|
||||
let theUrl = `${baseUrl}/indexes/${index}/search?q=${encodeURIComponent(search.value)}&attributesToHighlight=*`;
|
||||
|
||||
if (lastRequest) { lastRequest.abort() }
|
||||
lastRequest = new XMLHttpRequest();
|
||||
|
||||
lastRequest.open("GET", theUrl, true);
|
||||
|
||||
if (localStorage.getItem('apiKey')) {
|
||||
lastRequest.setRequestHeader("x-Meili-API-Key", localStorage.getItem('apiKey'));
|
||||
}
|
||||
|
||||
lastRequest.onload = function (e) {
|
||||
if (lastRequest.readyState === 4 && lastRequest.status === 200) {
|
||||
let sanitizedResponseText = sanitizeHTMLEntities(lastRequest.responseText);
|
||||
let httpResults = JSON.parse(sanitizedResponseText);
|
||||
results.innerHTML = '';
|
||||
|
||||
let processingTimeMs = httpResults.processingTimeMs;
|
||||
let numberOfDocuments = httpResults.nbHits;
|
||||
time.innerHTML = `${processingTimeMs}ms`;
|
||||
count.innerHTML = `${numberOfDocuments}`;
|
||||
|
||||
for (result of httpResults.hits) {
|
||||
const element = {...result, ...result._formatted };
|
||||
delete element._formatted;
|
||||
|
||||
const elem = document.createElement('li');
|
||||
elem.classList.add("document","box");
|
||||
|
||||
const div = document.createElement('div');
|
||||
div.classList.add("columns","is-desktop","is-tablet");
|
||||
const info = document.createElement('div');
|
||||
info.classList.add("column","align-on-mobile");
|
||||
let image = undefined;
|
||||
|
||||
for (const prop in element) {
|
||||
// Check if property is an image url link.
|
||||
if (typeof result[prop] === 'string') {
|
||||
if (image == undefined && result[prop].match(/^(https|http):\/\/.*(jpe?g|png|gif)(\?.*)?$/g)) {
|
||||
image = result[prop];
|
||||
}
|
||||
}
|
||||
|
||||
const field = document.createElement('div');
|
||||
field.classList.add("columns");
|
||||
|
||||
const attribute = document.createElement('div');
|
||||
attribute.classList.add("attribute", "column");
|
||||
attribute.innerHTML = prop;
|
||||
|
||||
const content = document.createElement('div');
|
||||
content.classList.add("content", "column");
|
||||
|
||||
if (typeof (element[prop]) === "object") {
|
||||
content.innerHTML = JSON.stringify(element[prop]);
|
||||
} else {
|
||||
content.innerHTML = element[prop];
|
||||
}
|
||||
|
||||
field.appendChild(attribute);
|
||||
field.appendChild(content);
|
||||
|
||||
info.appendChild(field);
|
||||
}
|
||||
|
||||
div.appendChild(info);
|
||||
elem.appendChild(div);
|
||||
|
||||
if (image != undefined) {
|
||||
const divImage = document.createElement('div');
|
||||
divImage.classList.add("image","column","align-on-mobile");
|
||||
|
||||
const img = document.createElement('img');
|
||||
img.src = image;
|
||||
img.setAttribute("alt","Item illustration");
|
||||
|
||||
divImage.appendChild(img);
|
||||
div.appendChild(divImage);
|
||||
elem.appendChild(div);
|
||||
}
|
||||
|
||||
results.appendChild(elem)
|
||||
}
|
||||
} else {
|
||||
console.error(lastRequest.statusText);
|
||||
}
|
||||
};
|
||||
lastRequest.send(null);
|
||||
}
|
||||
|
||||
if (!apiKey.value) {
|
||||
apiKey.value = localStorage.getItem('apiKey');
|
||||
}
|
||||
|
||||
apiKey.addEventListener('input', function(e) {
|
||||
localStorage.setItem('apiKey', apiKey.value);
|
||||
refreshIndexList();
|
||||
}, false);
|
||||
|
||||
let baseUrl = window.location.origin;
|
||||
setApiKeyField();
|
||||
refreshIndexList();
|
||||
|
||||
search.oninput = triggerSearch;
|
||||
|
||||
let select = document.getElementById("index");
|
||||
select.onchange = triggerSearch;
|
||||
|
||||
triggerSearch();
|
||||
</script>
|
||||
</html>
|
@ -1,142 +0,0 @@
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::{error, thread};
|
||||
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
|
||||
|
||||
use log::error;
|
||||
use serde::Serialize;
|
||||
use serde_qs as qs;
|
||||
use siphasher::sip::SipHasher;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
use crate::Data;
|
||||
use crate::Opt;
|
||||
|
||||
const AMPLITUDE_API_KEY: &str = "f7fba398780e06d8fe6666a9be7e3d47";
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct EventProperties {
|
||||
database_size: u64,
|
||||
last_update_timestamp: Option<i64>, //timestamp
|
||||
number_of_documents: Vec<u64>,
|
||||
}
|
||||
|
||||
impl EventProperties {
|
||||
fn from(data: Data) -> Result<EventProperties, Box<dyn error::Error>> {
|
||||
let mut index_list = Vec::new();
|
||||
|
||||
let reader = data.db.main_read_txn()?;
|
||||
|
||||
for index_uid in data.db.indexes_uids() {
|
||||
if let Some(index) = data.db.open_index(&index_uid) {
|
||||
let number_of_documents = index.main.number_of_documents(&reader)?;
|
||||
index_list.push(number_of_documents);
|
||||
}
|
||||
}
|
||||
|
||||
let database_size = WalkDir::new(&data.db_path)
|
||||
.into_iter()
|
||||
.filter_map(|entry| entry.ok())
|
||||
.filter_map(|entry| entry.metadata().ok())
|
||||
.filter(|metadata| metadata.is_file())
|
||||
.fold(0, |acc, m| acc + m.len());
|
||||
|
||||
let last_update_timestamp = data.db.last_update(&reader)?.map(|u| u.timestamp());
|
||||
|
||||
Ok(EventProperties {
|
||||
database_size,
|
||||
last_update_timestamp,
|
||||
number_of_documents: index_list,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct UserProperties<'a> {
|
||||
env: &'a str,
|
||||
start_since_days: u64,
|
||||
user_email: Option<String>,
|
||||
server_provider: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct Event<'a> {
|
||||
user_id: &'a str,
|
||||
event_type: &'a str,
|
||||
device_id: &'a str,
|
||||
time: u64,
|
||||
app_version: &'a str,
|
||||
user_properties: UserProperties<'a>,
|
||||
event_properties: Option<EventProperties>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct AmplitudeRequest<'a> {
|
||||
api_key: &'a str,
|
||||
event: &'a str,
|
||||
}
|
||||
|
||||
pub fn analytics_sender(data: Data, opt: Opt) {
|
||||
let username = whoami::username();
|
||||
let hostname = whoami::hostname();
|
||||
let platform = whoami::platform();
|
||||
|
||||
let uid = username + &hostname + &platform.to_string();
|
||||
|
||||
let mut hasher = SipHasher::new();
|
||||
uid.hash(&mut hasher);
|
||||
let hash = hasher.finish();
|
||||
|
||||
let uid = format!("{:X}", hash);
|
||||
let platform = platform.to_string();
|
||||
let first_start = Instant::now();
|
||||
|
||||
loop {
|
||||
let n = SystemTime::now().duration_since(UNIX_EPOCH).unwrap();
|
||||
let user_id = &uid;
|
||||
let device_id = &platform;
|
||||
let time = n.as_secs();
|
||||
let event_type = "runtime_tick";
|
||||
let elapsed_since_start = first_start.elapsed().as_secs() / 86_400; // One day
|
||||
let event_properties = EventProperties::from(data.clone()).ok();
|
||||
let app_version = env!("CARGO_PKG_VERSION").to_string();
|
||||
let app_version = app_version.as_str();
|
||||
let user_email = std::env::var("MEILI_USER_EMAIL").ok();
|
||||
let server_provider = std::env::var("MEILI_SERVER_PROVIDER").ok();
|
||||
let user_properties = UserProperties {
|
||||
env: &opt.env,
|
||||
start_since_days: elapsed_since_start,
|
||||
user_email,
|
||||
server_provider,
|
||||
};
|
||||
|
||||
let event = Event {
|
||||
user_id,
|
||||
event_type,
|
||||
device_id,
|
||||
time,
|
||||
app_version,
|
||||
user_properties,
|
||||
event_properties
|
||||
};
|
||||
let event = serde_json::to_string(&event).unwrap();
|
||||
|
||||
let request = AmplitudeRequest {
|
||||
api_key: AMPLITUDE_API_KEY,
|
||||
event: &event,
|
||||
};
|
||||
|
||||
let body = qs::to_string(&request).unwrap();
|
||||
let response = ureq::post("https://api.amplitude.com/httpapi").send_string(&body);
|
||||
match response {
|
||||
Err(ureq::Error::Status(_ , response)) => {
|
||||
error!("Unsuccessful call to Amplitude: {}", response.into_string().unwrap_or_default());
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Unsuccessful call to Amplitude: {}", e);
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
||||
thread::sleep(Duration::from_secs(3600)) // one hour
|
||||
}
|
||||
}
|
@ -1,175 +0,0 @@
|
||||
use std::error::Error;
|
||||
use std::ops::Deref;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use meilisearch_core::{Database, DatabaseOptions, Index};
|
||||
use sha2::Digest;
|
||||
|
||||
use crate::error::{Error as MSError, ResponseError};
|
||||
use crate::index_update_callback;
|
||||
use crate::option::Opt;
|
||||
use crate::dump::DumpInfo;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Data {
|
||||
inner: Arc<DataInner>,
|
||||
}
|
||||
|
||||
impl Deref for Data {
|
||||
type Target = DataInner;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.inner
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct DataInner {
|
||||
pub db: Arc<Database>,
|
||||
pub db_path: String,
|
||||
pub dumps_dir: PathBuf,
|
||||
pub dump_batch_size: usize,
|
||||
pub api_keys: ApiKeys,
|
||||
pub server_pid: u32,
|
||||
pub http_payload_size_limit: usize,
|
||||
pub current_dump: Arc<Mutex<Option<DumpInfo>>>,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ApiKeys {
|
||||
pub public: Option<String>,
|
||||
pub private: Option<String>,
|
||||
pub master: Option<String>,
|
||||
}
|
||||
|
||||
impl ApiKeys {
|
||||
pub fn generate_missing_api_keys(&mut self) {
|
||||
if let Some(master_key) = &self.master {
|
||||
if self.private.is_none() {
|
||||
let key = format!("{}-private", master_key);
|
||||
let sha = sha2::Sha256::digest(key.as_bytes());
|
||||
self.private = Some(format!("{:x}", sha));
|
||||
}
|
||||
if self.public.is_none() {
|
||||
let key = format!("{}-public", master_key);
|
||||
let sha = sha2::Sha256::digest(key.as_bytes());
|
||||
self.public = Some(format!("{:x}", sha));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Data {
|
||||
pub fn new(opt: Opt) -> Result<Data, Box<dyn Error>> {
|
||||
let db_path = opt.db_path.clone();
|
||||
let dumps_dir = opt.dumps_dir.clone();
|
||||
let dump_batch_size = opt.dump_batch_size;
|
||||
let server_pid = std::process::id();
|
||||
|
||||
let db_opt = DatabaseOptions {
|
||||
main_map_size: opt.max_mdb_size,
|
||||
update_map_size: opt.max_udb_size,
|
||||
};
|
||||
|
||||
let http_payload_size_limit = opt.http_payload_size_limit;
|
||||
|
||||
let db = Arc::new(Database::open_or_create(opt.db_path, db_opt)?);
|
||||
|
||||
let mut api_keys = ApiKeys {
|
||||
master: opt.master_key,
|
||||
private: None,
|
||||
public: None,
|
||||
};
|
||||
|
||||
api_keys.generate_missing_api_keys();
|
||||
|
||||
let current_dump = Arc::new(Mutex::new(None));
|
||||
|
||||
let inner_data = DataInner {
|
||||
db: db.clone(),
|
||||
db_path,
|
||||
dumps_dir,
|
||||
dump_batch_size,
|
||||
api_keys,
|
||||
server_pid,
|
||||
http_payload_size_limit,
|
||||
current_dump,
|
||||
};
|
||||
|
||||
let data = Data {
|
||||
inner: Arc::new(inner_data),
|
||||
};
|
||||
|
||||
let callback_context = data.clone();
|
||||
db.set_update_callback(Box::new(move |index_uid, status| {
|
||||
index_update_callback(&index_uid, &callback_context, status);
|
||||
}));
|
||||
|
||||
Ok(data)
|
||||
}
|
||||
|
||||
fn create_index(&self, uid: &str) -> Result<Index, ResponseError> {
|
||||
if !uid
|
||||
.chars()
|
||||
.all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_')
|
||||
{
|
||||
return Err(MSError::InvalidIndexUid.into());
|
||||
}
|
||||
|
||||
let created_index = self.db.create_index(&uid).map_err(|e| match e {
|
||||
meilisearch_core::Error::IndexAlreadyExists => e.into(),
|
||||
_ => ResponseError::from(MSError::create_index(e)),
|
||||
})?;
|
||||
|
||||
self.db.main_write::<_, _, ResponseError>(|mut writer| {
|
||||
created_index.main.put_name(&mut writer, uid)?;
|
||||
|
||||
created_index
|
||||
.main
|
||||
.created_at(&writer)?
|
||||
.ok_or(MSError::internal("Impossible to read created at"))?;
|
||||
|
||||
created_index
|
||||
.main
|
||||
.updated_at(&writer)?
|
||||
.ok_or(MSError::internal("Impossible to read updated at"))?;
|
||||
Ok(())
|
||||
})?;
|
||||
|
||||
Ok(created_index)
|
||||
}
|
||||
|
||||
pub fn get_current_dump_info(&self) -> Option<DumpInfo> {
|
||||
self.current_dump.lock().unwrap().clone()
|
||||
}
|
||||
|
||||
pub fn set_current_dump_info(&self, dump_info: DumpInfo) {
|
||||
self.current_dump.lock().unwrap().replace(dump_info);
|
||||
}
|
||||
|
||||
pub fn get_or_create_index<F, R>(&self, uid: &str, f: F) -> Result<R, ResponseError>
|
||||
where
|
||||
F: FnOnce(&Index) -> Result<R, ResponseError>,
|
||||
{
|
||||
let mut index_has_been_created = false;
|
||||
|
||||
let index = match self.db.open_index(&uid) {
|
||||
Some(index) => index,
|
||||
None => {
|
||||
index_has_been_created = true;
|
||||
self.create_index(&uid)?
|
||||
}
|
||||
};
|
||||
|
||||
match f(&index) {
|
||||
Ok(r) => Ok(r),
|
||||
Err(err) => {
|
||||
if index_has_been_created {
|
||||
let _ = self.db.delete_index(&uid);
|
||||
}
|
||||
Err(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,412 +0,0 @@
|
||||
use std::fs::{create_dir_all, File};
|
||||
use std::io::prelude::*;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::thread;
|
||||
|
||||
use actix_web::web;
|
||||
use chrono::offset::Utc;
|
||||
use indexmap::IndexMap;
|
||||
use log::{error, info};
|
||||
use meilisearch_core::{MainWriter, MainReader, UpdateReader};
|
||||
use meilisearch_core::settings::Settings;
|
||||
use meilisearch_core::update::{apply_settings_update, apply_documents_addition};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::json;
|
||||
use tempfile::TempDir;
|
||||
|
||||
use crate::Data;
|
||||
use crate::error::{Error, ResponseError};
|
||||
use crate::helpers::compression;
|
||||
use crate::routes::index;
|
||||
use crate::routes::index::IndexResponse;
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
|
||||
enum DumpVersion {
|
||||
V1,
|
||||
}
|
||||
|
||||
impl DumpVersion {
|
||||
const CURRENT: Self = Self::V1;
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct DumpMetadata {
|
||||
indexes: Vec<crate::routes::index::IndexResponse>,
|
||||
db_version: String,
|
||||
dump_version: DumpVersion,
|
||||
}
|
||||
|
||||
impl DumpMetadata {
|
||||
/// Create a DumpMetadata with the current dump version of meilisearch.
|
||||
pub fn new(indexes: Vec<crate::routes::index::IndexResponse>, db_version: String) -> Self {
|
||||
DumpMetadata {
|
||||
indexes,
|
||||
db_version,
|
||||
dump_version: DumpVersion::CURRENT,
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract DumpMetadata from `metadata.json` file present at provided `dir_path`
|
||||
fn from_path(dir_path: &Path) -> Result<Self, Error> {
|
||||
let path = dir_path.join("metadata.json");
|
||||
let file = File::open(path)?;
|
||||
let reader = std::io::BufReader::new(file);
|
||||
let metadata = serde_json::from_reader(reader)?;
|
||||
|
||||
Ok(metadata)
|
||||
}
|
||||
|
||||
/// Write DumpMetadata in `metadata.json` file at provided `dir_path`
|
||||
fn to_path(&self, dir_path: &Path) -> Result<(), Error> {
|
||||
let path = dir_path.join("metadata.json");
|
||||
let file = File::create(path)?;
|
||||
|
||||
serde_json::to_writer(file, &self)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract Settings from `settings.json` file present at provided `dir_path`
|
||||
fn settings_from_path(dir_path: &Path) -> Result<Settings, Error> {
|
||||
let path = dir_path.join("settings.json");
|
||||
let file = File::open(path)?;
|
||||
let reader = std::io::BufReader::new(file);
|
||||
let metadata = serde_json::from_reader(reader)?;
|
||||
|
||||
Ok(metadata)
|
||||
}
|
||||
|
||||
/// Write Settings in `settings.json` file at provided `dir_path`
|
||||
fn settings_to_path(settings: &Settings, dir_path: &Path) -> Result<(), Error> {
|
||||
let path = dir_path.join("settings.json");
|
||||
let file = File::create(path)?;
|
||||
|
||||
serde_json::to_writer(file, settings)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Import settings and documents of a dump with version `DumpVersion::V1` in specified index.
|
||||
fn import_index_v1(
|
||||
data: &Data,
|
||||
dumps_dir: &Path,
|
||||
index_uid: &str,
|
||||
document_batch_size: usize,
|
||||
write_txn: &mut MainWriter,
|
||||
) -> Result<(), Error> {
|
||||
|
||||
// open index
|
||||
let index = data
|
||||
.db
|
||||
.open_index(index_uid)
|
||||
.ok_or(Error::index_not_found(index_uid))?;
|
||||
|
||||
// index dir path in dump dir
|
||||
let index_path = &dumps_dir.join(index_uid);
|
||||
|
||||
// extract `settings.json` file and import content
|
||||
let settings = settings_from_path(&index_path)?;
|
||||
let settings = settings.to_update().map_err(|e| Error::dump_failed(format!("importing settings for index {}; {}", index_uid, e)))?;
|
||||
apply_settings_update(write_txn, &index, settings)?;
|
||||
|
||||
// create iterator over documents in `documents.jsonl` to make batch importation
|
||||
// create iterator over documents in `documents.jsonl` to make batch importation
|
||||
let documents = {
|
||||
let file = File::open(&index_path.join("documents.jsonl"))?;
|
||||
let reader = std::io::BufReader::new(file);
|
||||
let deserializer = serde_json::Deserializer::from_reader(reader);
|
||||
deserializer.into_iter::<IndexMap<String, serde_json::Value>>()
|
||||
};
|
||||
|
||||
// batch import document every `document_batch_size`:
|
||||
// create a Vec to bufferize documents
|
||||
let mut values = Vec::with_capacity(document_batch_size);
|
||||
// iterate over documents
|
||||
for document in documents {
|
||||
// push document in buffer
|
||||
values.push(document?);
|
||||
// if buffer is full, create and apply a batch, and clean buffer
|
||||
if values.len() == document_batch_size {
|
||||
let batch = std::mem::replace(&mut values, Vec::with_capacity(document_batch_size));
|
||||
apply_documents_addition(write_txn, &index, batch, None)?;
|
||||
}
|
||||
}
|
||||
|
||||
// apply documents remaining in the buffer
|
||||
if !values.is_empty() {
|
||||
apply_documents_addition(write_txn, &index, values, None)?;
|
||||
}
|
||||
|
||||
// sync index information: stats, updated_at, last_update
|
||||
if let Err(e) = crate::index_update_callback_txn(index, index_uid, data, write_txn) {
|
||||
return Err(Error::Internal(e));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Import dump from `dump_path` in database.
|
||||
pub fn import_dump(
|
||||
data: &Data,
|
||||
dump_path: &Path,
|
||||
document_batch_size: usize,
|
||||
) -> Result<(), Error> {
|
||||
info!("Importing dump from {:?}...", dump_path);
|
||||
|
||||
// create a temporary directory
|
||||
let tmp_dir = TempDir::new()?;
|
||||
let tmp_dir_path = tmp_dir.path();
|
||||
|
||||
// extract dump in temporary directory
|
||||
compression::from_tar_gz(dump_path, tmp_dir_path)?;
|
||||
|
||||
// read dump metadata
|
||||
let metadata = DumpMetadata::from_path(&tmp_dir_path)?;
|
||||
|
||||
// choose importation function from DumpVersion of metadata
|
||||
let import_index = match metadata.dump_version {
|
||||
DumpVersion::V1 => import_index_v1,
|
||||
};
|
||||
|
||||
// remove indexes which have same `uid` than indexes to import and create empty indexes
|
||||
let existing_index_uids = data.db.indexes_uids();
|
||||
for index in metadata.indexes.iter() {
|
||||
if existing_index_uids.contains(&index.uid) {
|
||||
data.db.delete_index(index.uid.clone())?;
|
||||
}
|
||||
index::create_index_sync(&data.db, index.uid.clone(), index.name.clone(), index.primary_key.clone())?;
|
||||
}
|
||||
|
||||
// import each indexes content
|
||||
data.db.main_write::<_, _, Error>(|mut writer| {
|
||||
for index in metadata.indexes {
|
||||
import_index(&data, tmp_dir_path, &index.uid, document_batch_size, &mut writer)?;
|
||||
}
|
||||
Ok(())
|
||||
})?;
|
||||
|
||||
info!("Dump importation from {:?} succeed", dump_path);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum DumpStatus {
|
||||
Done,
|
||||
InProgress,
|
||||
Failed,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct DumpInfo {
|
||||
pub uid: String,
|
||||
pub status: DumpStatus,
|
||||
#[serde(skip_serializing_if = "Option::is_none", flatten)]
|
||||
pub error: Option<serde_json::Value>,
|
||||
|
||||
}
|
||||
|
||||
impl DumpInfo {
|
||||
pub fn new(uid: String, status: DumpStatus) -> Self {
|
||||
Self { uid, status, error: None }
|
||||
}
|
||||
|
||||
pub fn with_error(mut self, error: ResponseError) -> Self {
|
||||
self.status = DumpStatus::Failed;
|
||||
self.error = Some(json!(error));
|
||||
|
||||
self
|
||||
}
|
||||
|
||||
pub fn dump_already_in_progress(&self) -> bool {
|
||||
self.status == DumpStatus::InProgress
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate uid from creation date
|
||||
fn generate_uid() -> String {
|
||||
Utc::now().format("%Y%m%d-%H%M%S%3f").to_string()
|
||||
}
|
||||
|
||||
/// Infer dumps_dir from dump_uid
|
||||
pub fn compressed_dumps_dir(dumps_dir: &Path, dump_uid: &str) -> PathBuf {
|
||||
dumps_dir.join(format!("{}.dump", dump_uid))
|
||||
}
|
||||
|
||||
/// Write metadata in dump
|
||||
fn dump_metadata(data: &web::Data<Data>, dir_path: &Path, indexes: Vec<IndexResponse>) -> Result<(), Error> {
|
||||
let (db_major, db_minor, db_patch) = data.db.version();
|
||||
let metadata = DumpMetadata::new(indexes, format!("{}.{}.{}", db_major, db_minor, db_patch));
|
||||
|
||||
metadata.to_path(dir_path)
|
||||
}
|
||||
|
||||
/// Export settings of provided index in dump
|
||||
fn dump_index_settings(data: &web::Data<Data>, reader: &MainReader, dir_path: &Path, index_uid: &str) -> Result<(), Error> {
|
||||
let settings = crate::routes::setting::get_all_sync(data, reader, index_uid)?;
|
||||
|
||||
settings_to_path(&settings, dir_path)
|
||||
}
|
||||
|
||||
/// Export updates of provided index in dump
|
||||
fn dump_index_updates(data: &web::Data<Data>, reader: &UpdateReader, dir_path: &Path, index_uid: &str) -> Result<(), Error> {
|
||||
let updates_path = dir_path.join("updates.jsonl");
|
||||
let updates = crate::routes::index::get_all_updates_status_sync(data, reader, index_uid)?;
|
||||
|
||||
let file = File::create(updates_path)?;
|
||||
|
||||
for update in updates {
|
||||
serde_json::to_writer(&file, &update)?;
|
||||
writeln!(&file)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Export documents of provided index in dump
|
||||
fn dump_index_documents(data: &web::Data<Data>, reader: &MainReader, dir_path: &Path, index_uid: &str) -> Result<(), Error> {
|
||||
let documents_path = dir_path.join("documents.jsonl");
|
||||
let file = File::create(documents_path)?;
|
||||
let dump_batch_size = data.dump_batch_size;
|
||||
|
||||
let mut offset = 0;
|
||||
loop {
|
||||
let documents = crate::routes::document::get_all_documents_sync(data, reader, index_uid, offset, dump_batch_size, None)?;
|
||||
if documents.is_empty() { break; } else { offset += dump_batch_size; }
|
||||
|
||||
for document in documents {
|
||||
serde_json::to_writer(&file, &document)?;
|
||||
writeln!(&file)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Write error with a context.
|
||||
fn fail_dump_process<E: std::error::Error>(data: &web::Data<Data>, dump_info: DumpInfo, context: &str, error: E) {
|
||||
let error_message = format!("{}; {}", context, error);
|
||||
error!("Something went wrong during dump process: {}", &error_message);
|
||||
data.set_current_dump_info(dump_info.with_error(Error::dump_failed(error_message).into()))
|
||||
}
|
||||
|
||||
/// Main function of dump.
|
||||
fn dump_process(data: web::Data<Data>, dumps_dir: PathBuf, dump_info: DumpInfo) {
|
||||
// open read transaction on Update
|
||||
let update_reader = match data.db.update_read_txn() {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
fail_dump_process(&data, dump_info, "creating RO transaction on updates", e);
|
||||
return ;
|
||||
}
|
||||
};
|
||||
|
||||
// open read transaction on Main
|
||||
let main_reader = match data.db.main_read_txn() {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
fail_dump_process(&data, dump_info, "creating RO transaction on main", e);
|
||||
return ;
|
||||
}
|
||||
};
|
||||
|
||||
// create a temporary directory
|
||||
let tmp_dir = match TempDir::new() {
|
||||
Ok(tmp_dir) => tmp_dir,
|
||||
Err(e) => {
|
||||
fail_dump_process(&data, dump_info, "creating temporary directory", e);
|
||||
return ;
|
||||
}
|
||||
};
|
||||
let tmp_dir_path = tmp_dir.path();
|
||||
|
||||
// fetch indexes
|
||||
let indexes = match crate::routes::index::list_indexes_sync(&data, &main_reader) {
|
||||
Ok(indexes) => indexes,
|
||||
Err(e) => {
|
||||
fail_dump_process(&data, dump_info, "listing indexes", e);
|
||||
return ;
|
||||
}
|
||||
};
|
||||
|
||||
// create metadata
|
||||
if let Err(e) = dump_metadata(&data, &tmp_dir_path, indexes.clone()) {
|
||||
fail_dump_process(&data, dump_info, "generating metadata", e);
|
||||
return ;
|
||||
}
|
||||
|
||||
// export settings, updates and documents for each indexes
|
||||
for index in indexes {
|
||||
let index_path = tmp_dir_path.join(&index.uid);
|
||||
|
||||
// create index sub-dircetory
|
||||
if let Err(e) = create_dir_all(&index_path) {
|
||||
fail_dump_process(&data, dump_info, &format!("creating directory for index {}", &index.uid), e);
|
||||
return ;
|
||||
}
|
||||
|
||||
// export settings
|
||||
if let Err(e) = dump_index_settings(&data, &main_reader, &index_path, &index.uid) {
|
||||
fail_dump_process(&data, dump_info, &format!("generating settings for index {}", &index.uid), e);
|
||||
return ;
|
||||
}
|
||||
|
||||
// export documents
|
||||
if let Err(e) = dump_index_documents(&data, &main_reader, &index_path, &index.uid) {
|
||||
fail_dump_process(&data, dump_info, &format!("generating documents for index {}", &index.uid), e);
|
||||
return ;
|
||||
}
|
||||
|
||||
// export updates
|
||||
if let Err(e) = dump_index_updates(&data, &update_reader, &index_path, &index.uid) {
|
||||
fail_dump_process(&data, dump_info, &format!("generating updates for index {}", &index.uid), e);
|
||||
return ;
|
||||
}
|
||||
}
|
||||
|
||||
// compress dump in a file named `{dump_uid}.dump` in `dumps_dir`
|
||||
if let Err(e) = crate::helpers::compression::to_tar_gz(&tmp_dir_path, &compressed_dumps_dir(&dumps_dir, &dump_info.uid)) {
|
||||
fail_dump_process(&data, dump_info, "compressing dump", e);
|
||||
return ;
|
||||
}
|
||||
|
||||
// update dump info to `done`
|
||||
let resume = DumpInfo::new(
|
||||
dump_info.uid,
|
||||
DumpStatus::Done
|
||||
);
|
||||
|
||||
data.set_current_dump_info(resume);
|
||||
}
|
||||
|
||||
pub fn init_dump_process(data: &web::Data<Data>, dumps_dir: &Path) -> Result<DumpInfo, Error> {
|
||||
create_dir_all(dumps_dir).map_err(|e| Error::dump_failed(format!("creating temporary directory {}", e)))?;
|
||||
|
||||
// check if a dump is already in progress
|
||||
if let Some(resume) = data.get_current_dump_info() {
|
||||
if resume.dump_already_in_progress() {
|
||||
return Err(Error::dump_conflict())
|
||||
}
|
||||
}
|
||||
|
||||
// generate a new dump info
|
||||
let info = DumpInfo::new(
|
||||
generate_uid(),
|
||||
DumpStatus::InProgress
|
||||
);
|
||||
|
||||
data.set_current_dump_info(info.clone());
|
||||
|
||||
let data = data.clone();
|
||||
let dumps_dir = dumps_dir.to_path_buf();
|
||||
let info_cloned = info.clone();
|
||||
// run dump process in a new thread
|
||||
thread::spawn(move ||
|
||||
dump_process(data, dumps_dir, info_cloned)
|
||||
);
|
||||
|
||||
Ok(info)
|
||||
}
|
@ -1,307 +0,0 @@
|
||||
use std::error;
|
||||
use std::fmt;
|
||||
|
||||
use actix_http::ResponseBuilder;
|
||||
use actix_web as aweb;
|
||||
use actix_web::error::{JsonPayloadError, QueryPayloadError};
|
||||
use actix_web::http::StatusCode;
|
||||
use serde::ser::{Serialize, Serializer, SerializeStruct};
|
||||
|
||||
use meilisearch_error::{ErrorCode, Code};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ResponseError {
|
||||
inner: Box<dyn ErrorCode>,
|
||||
}
|
||||
|
||||
impl error::Error for ResponseError {}
|
||||
|
||||
impl ErrorCode for ResponseError {
|
||||
fn error_code(&self) -> Code {
|
||||
self.inner.error_code()
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for ResponseError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
self.inner.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Error> for ResponseError {
|
||||
fn from(error: Error) -> ResponseError {
|
||||
ResponseError { inner: Box::new(error) }
|
||||
}
|
||||
}
|
||||
|
||||
impl From<meilisearch_core::Error> for ResponseError {
|
||||
fn from(err: meilisearch_core::Error) -> ResponseError {
|
||||
ResponseError { inner: Box::new(err) }
|
||||
}
|
||||
}
|
||||
|
||||
impl From<meilisearch_schema::Error> for ResponseError {
|
||||
fn from(err: meilisearch_schema::Error) -> ResponseError {
|
||||
ResponseError { inner: Box::new(err) }
|
||||
}
|
||||
}
|
||||
|
||||
impl From<FacetCountError> for ResponseError {
|
||||
fn from(err: FacetCountError) -> ResponseError {
|
||||
ResponseError { inner: Box::new(err) }
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for ResponseError {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: Serializer,
|
||||
{
|
||||
let struct_name = "ResponseError";
|
||||
let field_count = 4;
|
||||
|
||||
let mut state = serializer.serialize_struct(struct_name, field_count)?;
|
||||
state.serialize_field("message", &self.to_string())?;
|
||||
state.serialize_field("errorCode", &self.error_name())?;
|
||||
state.serialize_field("errorType", &self.error_type())?;
|
||||
state.serialize_field("errorLink", &self.error_url())?;
|
||||
state.end()
|
||||
}
|
||||
}
|
||||
|
||||
impl aweb::error::ResponseError for ResponseError {
|
||||
fn error_response(&self) -> aweb::HttpResponse {
|
||||
ResponseBuilder::new(self.status_code()).json(&self)
|
||||
}
|
||||
|
||||
fn status_code(&self) -> StatusCode {
|
||||
self.http_status()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
BadParameter(String, String),
|
||||
BadRequest(String),
|
||||
CreateIndex(String),
|
||||
DocumentNotFound(String),
|
||||
IndexNotFound(String),
|
||||
IndexAlreadyExists(String),
|
||||
Internal(String),
|
||||
InvalidIndexUid,
|
||||
InvalidToken(String),
|
||||
MissingAuthorizationHeader,
|
||||
NotFound(String),
|
||||
OpenIndex(String),
|
||||
RetrieveDocument(u32, String),
|
||||
SearchDocuments(String),
|
||||
PayloadTooLarge,
|
||||
UnsupportedMediaType,
|
||||
DumpAlreadyInProgress,
|
||||
DumpProcessFailed(String),
|
||||
}
|
||||
|
||||
impl error::Error for Error {}
|
||||
|
||||
impl ErrorCode for Error {
|
||||
fn error_code(&self) -> Code {
|
||||
use Error::*;
|
||||
match self {
|
||||
BadParameter(_, _) => Code::BadParameter,
|
||||
BadRequest(_) => Code::BadRequest,
|
||||
CreateIndex(_) => Code::CreateIndex,
|
||||
DocumentNotFound(_) => Code::DocumentNotFound,
|
||||
IndexNotFound(_) => Code::IndexNotFound,
|
||||
IndexAlreadyExists(_) => Code::IndexAlreadyExists,
|
||||
Internal(_) => Code::Internal,
|
||||
InvalidIndexUid => Code::InvalidIndexUid,
|
||||
InvalidToken(_) => Code::InvalidToken,
|
||||
MissingAuthorizationHeader => Code::MissingAuthorizationHeader,
|
||||
NotFound(_) => Code::NotFound,
|
||||
OpenIndex(_) => Code::OpenIndex,
|
||||
RetrieveDocument(_, _) => Code::RetrieveDocument,
|
||||
SearchDocuments(_) => Code::SearchDocuments,
|
||||
PayloadTooLarge => Code::PayloadTooLarge,
|
||||
UnsupportedMediaType => Code::UnsupportedMediaType,
|
||||
DumpAlreadyInProgress => Code::DumpAlreadyInProgress,
|
||||
DumpProcessFailed(_) => Code::DumpProcessFailed,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum FacetCountError {
|
||||
AttributeNotSet(String),
|
||||
SyntaxError(String),
|
||||
UnexpectedToken { found: String, expected: &'static [&'static str] },
|
||||
NoFacetSet,
|
||||
}
|
||||
|
||||
impl error::Error for FacetCountError {}
|
||||
|
||||
impl ErrorCode for FacetCountError {
|
||||
fn error_code(&self) -> Code {
|
||||
Code::BadRequest
|
||||
}
|
||||
}
|
||||
|
||||
impl FacetCountError {
|
||||
pub fn unexpected_token(found: impl ToString, expected: &'static [&'static str]) -> FacetCountError {
|
||||
let found = found.to_string();
|
||||
FacetCountError::UnexpectedToken { expected, found }
|
||||
}
|
||||
}
|
||||
|
||||
impl From<serde_json::error::Error> for FacetCountError {
|
||||
fn from(other: serde_json::error::Error) -> FacetCountError {
|
||||
FacetCountError::SyntaxError(other.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for FacetCountError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
use FacetCountError::*;
|
||||
|
||||
match self {
|
||||
AttributeNotSet(attr) => write!(f, "Attribute {} is not set as facet", attr),
|
||||
SyntaxError(msg) => write!(f, "Syntax error: {}", msg),
|
||||
UnexpectedToken { expected, found } => write!(f, "Unexpected {} found, expected {:?}", found, expected),
|
||||
NoFacetSet => write!(f, "Can't perform facet count, as no facet is set"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Error {
|
||||
pub fn internal(err: impl fmt::Display) -> Error {
|
||||
Error::Internal(err.to_string())
|
||||
}
|
||||
|
||||
pub fn bad_request(err: impl fmt::Display) -> Error {
|
||||
Error::BadRequest(err.to_string())
|
||||
}
|
||||
|
||||
pub fn missing_authorization_header() -> Error {
|
||||
Error::MissingAuthorizationHeader
|
||||
}
|
||||
|
||||
pub fn invalid_token(err: impl fmt::Display) -> Error {
|
||||
Error::InvalidToken(err.to_string())
|
||||
}
|
||||
|
||||
pub fn not_found(err: impl fmt::Display) -> Error {
|
||||
Error::NotFound(err.to_string())
|
||||
}
|
||||
|
||||
pub fn index_not_found(err: impl fmt::Display) -> Error {
|
||||
Error::IndexNotFound(err.to_string())
|
||||
}
|
||||
|
||||
pub fn document_not_found(err: impl fmt::Display) -> Error {
|
||||
Error::DocumentNotFound(err.to_string())
|
||||
}
|
||||
|
||||
pub fn bad_parameter(param: impl fmt::Display, err: impl fmt::Display) -> Error {
|
||||
Error::BadParameter(param.to_string(), err.to_string())
|
||||
}
|
||||
|
||||
pub fn open_index(err: impl fmt::Display) -> Error {
|
||||
Error::OpenIndex(err.to_string())
|
||||
}
|
||||
|
||||
pub fn create_index(err: impl fmt::Display) -> Error {
|
||||
Error::CreateIndex(err.to_string())
|
||||
}
|
||||
|
||||
pub fn invalid_index_uid() -> Error {
|
||||
Error::InvalidIndexUid
|
||||
}
|
||||
|
||||
pub fn retrieve_document(doc_id: u32, err: impl fmt::Display) -> Error {
|
||||
Error::RetrieveDocument(doc_id, err.to_string())
|
||||
}
|
||||
|
||||
pub fn search_documents(err: impl fmt::Display) -> Error {
|
||||
Error::SearchDocuments(err.to_string())
|
||||
}
|
||||
|
||||
pub fn dump_conflict() -> Error {
|
||||
Error::DumpAlreadyInProgress
|
||||
}
|
||||
|
||||
pub fn dump_failed(message: String) -> Error {
|
||||
Error::DumpProcessFailed(message)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Self::BadParameter(param, err) => write!(f, "Url parameter {} error: {}", param, err),
|
||||
Self::BadRequest(err) => f.write_str(err),
|
||||
Self::CreateIndex(err) => write!(f, "Impossible to create index; {}", err),
|
||||
Self::DocumentNotFound(document_id) => write!(f, "Document with id {} not found", document_id),
|
||||
Self::IndexNotFound(index_uid) => write!(f, "Index {} not found", index_uid),
|
||||
Self::IndexAlreadyExists(index_uid) => write!(f, "Index {} already exists", index_uid),
|
||||
Self::Internal(err) => f.write_str(err),
|
||||
Self::InvalidIndexUid => f.write_str("Index must have a valid uid; Index uid can be of type integer or string only composed of alphanumeric characters, hyphens (-) and underscores (_)."),
|
||||
Self::InvalidToken(err) => write!(f, "Invalid API key: {}", err),
|
||||
Self::MissingAuthorizationHeader => f.write_str("You must have an authorization token"),
|
||||
Self::NotFound(err) => write!(f, "{} not found", err),
|
||||
Self::OpenIndex(err) => write!(f, "Impossible to open index; {}", err),
|
||||
Self::RetrieveDocument(id, err) => write!(f, "Impossible to retrieve the document with id: {}; {}", id, err),
|
||||
Self::SearchDocuments(err) => write!(f, "Impossible to search documents; {}", err),
|
||||
Self::PayloadTooLarge => f.write_str("Payload too large"),
|
||||
Self::UnsupportedMediaType => f.write_str("Unsupported media type"),
|
||||
Self::DumpAlreadyInProgress => f.write_str("Another dump is already in progress"),
|
||||
Self::DumpProcessFailed(message) => write!(f, "Dump process failed: {}", message),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<std::io::Error> for Error {
|
||||
fn from(err: std::io::Error) -> Error {
|
||||
Error::Internal(err.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<actix_http::Error> for Error {
|
||||
fn from(err: actix_http::Error) -> Error {
|
||||
Error::Internal(err.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<meilisearch_core::Error> for Error {
|
||||
fn from(err: meilisearch_core::Error) -> Error {
|
||||
Error::Internal(err.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<serde_json::error::Error> for Error {
|
||||
fn from(err: serde_json::error::Error) -> Error {
|
||||
Error::Internal(err.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<JsonPayloadError> for Error {
|
||||
fn from(err: JsonPayloadError) -> Error {
|
||||
match err {
|
||||
JsonPayloadError::Deserialize(err) => Error::BadRequest(format!("Invalid JSON: {}", err)),
|
||||
JsonPayloadError::Overflow => Error::PayloadTooLarge,
|
||||
JsonPayloadError::ContentType => Error::UnsupportedMediaType,
|
||||
JsonPayloadError::Payload(err) => Error::BadRequest(format!("Problem while decoding the request: {}", err)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<QueryPayloadError> for Error {
|
||||
fn from(err: QueryPayloadError) -> Error {
|
||||
match err {
|
||||
QueryPayloadError::Deserialize(err) => Error::BadRequest(format!("Invalid query parameters: {}", err)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn payload_error_handler<E: Into<Error>>(err: E) -> ResponseError {
|
||||
let error: Error = err.into();
|
||||
error.into()
|
||||
}
|
@ -1,107 +0,0 @@
|
||||
use std::cell::RefCell;
|
||||
use std::pin::Pin;
|
||||
use std::rc::Rc;
|
||||
use std::task::{Context, Poll};
|
||||
|
||||
use actix_service::{Service, Transform};
|
||||
use actix_web::{dev::ServiceRequest, dev::ServiceResponse, web};
|
||||
use futures::future::{err, ok, Future, Ready};
|
||||
use actix_web::error::ResponseError as _;
|
||||
use actix_web::dev::Body;
|
||||
|
||||
use crate::error::{Error, ResponseError};
|
||||
use crate::Data;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum Authentication {
|
||||
Public,
|
||||
Private,
|
||||
Admin,
|
||||
}
|
||||
|
||||
impl<S: 'static> Transform<S> for Authentication
|
||||
where
|
||||
S: Service<Request = ServiceRequest, Response = ServiceResponse<Body>, Error = actix_web::Error>,
|
||||
S::Future: 'static,
|
||||
{
|
||||
type Request = ServiceRequest;
|
||||
type Response = ServiceResponse<Body>;
|
||||
type Error = actix_web::Error;
|
||||
type InitError = ();
|
||||
type Transform = LoggingMiddleware<S>;
|
||||
type Future = Ready<Result<Self::Transform, Self::InitError>>;
|
||||
|
||||
fn new_transform(&self, service: S) -> Self::Future {
|
||||
ok(LoggingMiddleware {
|
||||
acl: self.clone(),
|
||||
service: Rc::new(RefCell::new(service)),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub struct LoggingMiddleware<S> {
|
||||
acl: Authentication,
|
||||
service: Rc<RefCell<S>>,
|
||||
}
|
||||
|
||||
#[allow(clippy::type_complexity)]
|
||||
impl<S> Service for LoggingMiddleware<S>
|
||||
where
|
||||
S: Service<Request = ServiceRequest, Response = ServiceResponse<Body>, Error = actix_web::Error> + 'static,
|
||||
S::Future: 'static,
|
||||
{
|
||||
type Request = ServiceRequest;
|
||||
type Response = ServiceResponse<Body>;
|
||||
type Error = actix_web::Error;
|
||||
type Future = Pin<Box<dyn Future<Output = Result<Self::Response, Self::Error>>>>;
|
||||
|
||||
fn poll_ready(&mut self, cx: &mut Context) -> Poll<Result<(), Self::Error>> {
|
||||
self.service.poll_ready(cx)
|
||||
}
|
||||
|
||||
fn call(&mut self, req: ServiceRequest) -> Self::Future {
|
||||
let mut svc = self.service.clone();
|
||||
// This unwrap is left because this error should never appear. If that's the case, then
|
||||
// it means that actix-web has an issue or someone changes the type `Data`.
|
||||
let data = req.app_data::<web::Data<Data>>().unwrap();
|
||||
|
||||
if data.api_keys.master.is_none() {
|
||||
return Box::pin(svc.call(req));
|
||||
}
|
||||
|
||||
let auth_header = match req.headers().get("X-Meili-API-Key") {
|
||||
Some(auth) => match auth.to_str() {
|
||||
Ok(auth) => auth,
|
||||
Err(_) => {
|
||||
let error = ResponseError::from(Error::MissingAuthorizationHeader).error_response();
|
||||
let (request, _) = req.into_parts();
|
||||
return Box::pin(ok(ServiceResponse::new(request, error)))
|
||||
}
|
||||
},
|
||||
None => {
|
||||
return Box::pin(err(ResponseError::from(Error::MissingAuthorizationHeader).into()));
|
||||
}
|
||||
};
|
||||
|
||||
let authenticated = match self.acl {
|
||||
Authentication::Admin => data.api_keys.master.as_deref() == Some(auth_header),
|
||||
Authentication::Private => {
|
||||
data.api_keys.master.as_deref() == Some(auth_header)
|
||||
|| data.api_keys.private.as_deref() == Some(auth_header)
|
||||
}
|
||||
Authentication::Public => {
|
||||
data.api_keys.master.as_deref() == Some(auth_header)
|
||||
|| data.api_keys.private.as_deref() == Some(auth_header)
|
||||
|| data.api_keys.public.as_deref() == Some(auth_header)
|
||||
}
|
||||
};
|
||||
|
||||
if authenticated {
|
||||
Box::pin(svc.call(req))
|
||||
} else {
|
||||
let error = ResponseError::from(Error::InvalidToken(auth_header.to_string())).error_response();
|
||||
let (request, _) = req.into_parts();
|
||||
Box::pin(ok(ServiceResponse::new(request, error)))
|
||||
}
|
||||
}
|
||||
}
|
@ -1,35 +0,0 @@
|
||||
use flate2::Compression;
|
||||
use flate2::read::GzDecoder;
|
||||
use flate2::write::GzEncoder;
|
||||
use std::fs::{create_dir_all, rename, File};
|
||||
use std::path::Path;
|
||||
use tar::{Builder, Archive};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::error::Error;
|
||||
|
||||
pub fn to_tar_gz(src: &Path, dest: &Path) -> Result<(), Error> {
|
||||
let file_name = format!(".{}", Uuid::new_v4().to_urn());
|
||||
let p = dest.with_file_name(file_name);
|
||||
let tmp_dest = p.as_path();
|
||||
|
||||
let f = File::create(tmp_dest)?;
|
||||
let gz_encoder = GzEncoder::new(f, Compression::default());
|
||||
let mut tar_encoder = Builder::new(gz_encoder);
|
||||
tar_encoder.append_dir_all(".", src)?;
|
||||
let gz_encoder = tar_encoder.into_inner()?;
|
||||
gz_encoder.finish()?;
|
||||
|
||||
rename(tmp_dest, dest)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn from_tar_gz(src: &Path, dest: &Path) -> Result<(), Error> {
|
||||
let f = File::open(src)?;
|
||||
let gz = GzDecoder::new(f);
|
||||
let mut ar = Archive::new(gz);
|
||||
create_dir_all(dest)?;
|
||||
ar.unpack(dest)?;
|
||||
Ok(())
|
||||
}
|
@ -1,649 +0,0 @@
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::time::Instant;
|
||||
|
||||
use indexmap::IndexMap;
|
||||
use log::error;
|
||||
use meilisearch_core::{Filter, MainReader};
|
||||
use meilisearch_core::facets::FacetFilter;
|
||||
use meilisearch_core::criterion::*;
|
||||
use meilisearch_core::settings::RankingRule;
|
||||
use meilisearch_core::{Highlight, Index, RankedMap};
|
||||
use meilisearch_schema::{FieldId, Schema};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::Value;
|
||||
use siphasher::sip::SipHasher;
|
||||
use slice_group_by::GroupBy;
|
||||
|
||||
use crate::error::{Error, ResponseError};
|
||||
|
||||
pub trait IndexSearchExt {
|
||||
fn new_search(&self, query: Option<String>) -> SearchBuilder;
|
||||
}
|
||||
|
||||
impl IndexSearchExt for Index {
|
||||
fn new_search(&self, query: Option<String>) -> SearchBuilder {
|
||||
SearchBuilder {
|
||||
index: self,
|
||||
query,
|
||||
offset: 0,
|
||||
limit: 20,
|
||||
attributes_to_crop: None,
|
||||
attributes_to_retrieve: None,
|
||||
attributes_to_highlight: None,
|
||||
filters: None,
|
||||
matches: false,
|
||||
facet_filters: None,
|
||||
facets: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct SearchBuilder<'a> {
|
||||
index: &'a Index,
|
||||
query: Option<String>,
|
||||
offset: usize,
|
||||
limit: usize,
|
||||
attributes_to_crop: Option<HashMap<String, usize>>,
|
||||
attributes_to_retrieve: Option<HashSet<String>>,
|
||||
attributes_to_highlight: Option<HashSet<String>>,
|
||||
filters: Option<String>,
|
||||
matches: bool,
|
||||
facet_filters: Option<FacetFilter>,
|
||||
facets: Option<Vec<(FieldId, String)>>
|
||||
}
|
||||
|
||||
impl<'a> SearchBuilder<'a> {
|
||||
pub fn offset(&mut self, value: usize) -> &SearchBuilder {
|
||||
self.offset = value;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn limit(&mut self, value: usize) -> &SearchBuilder {
|
||||
self.limit = value;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn attributes_to_crop(&mut self, value: HashMap<String, usize>) -> &SearchBuilder {
|
||||
self.attributes_to_crop = Some(value);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn attributes_to_retrieve(&mut self, value: HashSet<String>) -> &SearchBuilder {
|
||||
self.attributes_to_retrieve = Some(value);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn add_retrievable_field(&mut self, value: String) -> &SearchBuilder {
|
||||
let attributes_to_retrieve = self.attributes_to_retrieve.get_or_insert(HashSet::new());
|
||||
attributes_to_retrieve.insert(value);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn attributes_to_highlight(&mut self, value: HashSet<String>) -> &SearchBuilder {
|
||||
self.attributes_to_highlight = Some(value);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn add_facet_filters(&mut self, filters: FacetFilter) -> &SearchBuilder {
|
||||
self.facet_filters = Some(filters);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn filters(&mut self, value: String) -> &SearchBuilder {
|
||||
self.filters = Some(value);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn get_matches(&mut self) -> &SearchBuilder {
|
||||
self.matches = true;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn add_facets(&mut self, facets: Vec<(FieldId, String)>) -> &SearchBuilder {
|
||||
self.facets = Some(facets);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn search(self, reader: &MainReader) -> Result<SearchResult, ResponseError> {
|
||||
let schema = self
|
||||
.index
|
||||
.main
|
||||
.schema(reader)?
|
||||
.ok_or(Error::internal("missing schema"))?;
|
||||
|
||||
let ranked_map = self.index.main.ranked_map(reader)?.unwrap_or_default();
|
||||
|
||||
// Change criteria
|
||||
let mut query_builder = match self.get_criteria(reader, &ranked_map, &schema)? {
|
||||
Some(criteria) => self.index.query_builder_with_criteria(criteria),
|
||||
None => self.index.query_builder(),
|
||||
};
|
||||
|
||||
if let Some(filter_expression) = &self.filters {
|
||||
let filter = Filter::parse(filter_expression, &schema)?;
|
||||
let index = &self.index;
|
||||
query_builder.with_filter(move |id| {
|
||||
let reader = &reader;
|
||||
let filter = &filter;
|
||||
match filter.test(reader, index, id) {
|
||||
Ok(res) => res,
|
||||
Err(e) => {
|
||||
log::warn!("unexpected error during filtering: {}", e);
|
||||
false
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
if let Some(field) = self.index.main.distinct_attribute(reader)? {
|
||||
let index = &self.index;
|
||||
query_builder.with_distinct(1, move |id| {
|
||||
match index.document_attribute_bytes(reader, id, field) {
|
||||
Ok(Some(bytes)) => {
|
||||
let mut s = SipHasher::new();
|
||||
bytes.hash(&mut s);
|
||||
Some(s.finish())
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
query_builder.set_facet_filter(self.facet_filters);
|
||||
query_builder.set_facets(self.facets);
|
||||
|
||||
let start = Instant::now();
|
||||
let result = query_builder.query(reader, self.query.as_deref(), self.offset..(self.offset + self.limit));
|
||||
let search_result = result.map_err(Error::search_documents)?;
|
||||
let time_ms = start.elapsed().as_millis() as usize;
|
||||
|
||||
let mut all_attributes: HashSet<&str> = HashSet::new();
|
||||
let mut all_formatted: HashSet<&str> = HashSet::new();
|
||||
|
||||
match &self.attributes_to_retrieve {
|
||||
Some(to_retrieve) => {
|
||||
all_attributes.extend(to_retrieve.iter().map(String::as_str));
|
||||
|
||||
if let Some(to_highlight) = &self.attributes_to_highlight {
|
||||
all_formatted.extend(to_highlight.iter().map(String::as_str));
|
||||
}
|
||||
|
||||
if let Some(to_crop) = &self.attributes_to_crop {
|
||||
all_formatted.extend(to_crop.keys().map(String::as_str));
|
||||
}
|
||||
|
||||
all_attributes.extend(&all_formatted);
|
||||
},
|
||||
None => {
|
||||
all_attributes.extend(schema.displayed_names());
|
||||
// If we specified at least one attribute to highlight or crop then
|
||||
// all available attributes will be returned in the _formatted field.
|
||||
if self.attributes_to_highlight.is_some() || self.attributes_to_crop.is_some() {
|
||||
all_formatted.extend(all_attributes.iter().cloned());
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
let mut hits = Vec::with_capacity(self.limit);
|
||||
for doc in search_result.documents {
|
||||
let mut document: IndexMap<String, Value> = self
|
||||
.index
|
||||
.document(reader, Some(&all_attributes), doc.id)
|
||||
.map_err(|e| Error::retrieve_document(doc.id.0, e))?
|
||||
.unwrap_or_default();
|
||||
|
||||
let mut formatted = document.iter()
|
||||
.filter(|(key, _)| all_formatted.contains(key.as_str()))
|
||||
.map(|(k, v)| (k.clone(), v.clone()))
|
||||
.collect();
|
||||
|
||||
let mut matches = doc.highlights.clone();
|
||||
|
||||
// Crops fields if needed
|
||||
if let Some(fields) = &self.attributes_to_crop {
|
||||
crop_document(&mut formatted, &mut matches, &schema, fields);
|
||||
}
|
||||
|
||||
// Transform to readable matches
|
||||
if let Some(attributes_to_highlight) = &self.attributes_to_highlight {
|
||||
let matches = calculate_matches(
|
||||
&matches,
|
||||
self.attributes_to_highlight.clone(),
|
||||
&schema,
|
||||
);
|
||||
formatted = calculate_highlights(&formatted, &matches, attributes_to_highlight);
|
||||
}
|
||||
|
||||
let matches_info = if self.matches {
|
||||
Some(calculate_matches(&matches, self.attributes_to_retrieve.clone(), &schema))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
if let Some(attributes_to_retrieve) = &self.attributes_to_retrieve {
|
||||
document.retain(|key, _| attributes_to_retrieve.contains(&key.to_string()))
|
||||
}
|
||||
|
||||
let hit = SearchHit {
|
||||
document,
|
||||
formatted,
|
||||
matches_info,
|
||||
};
|
||||
|
||||
hits.push(hit);
|
||||
}
|
||||
|
||||
let results = SearchResult {
|
||||
hits,
|
||||
offset: self.offset,
|
||||
limit: self.limit,
|
||||
nb_hits: search_result.nb_hits,
|
||||
exhaustive_nb_hits: search_result.exhaustive_nb_hit,
|
||||
processing_time_ms: time_ms,
|
||||
query: self.query.unwrap_or_default(),
|
||||
facets_distribution: search_result.facets,
|
||||
exhaustive_facets_count: search_result.exhaustive_facets_count,
|
||||
};
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
pub fn get_criteria(
|
||||
&self,
|
||||
reader: &MainReader,
|
||||
ranked_map: &'a RankedMap,
|
||||
schema: &Schema,
|
||||
) -> Result<Option<Criteria<'a>>, ResponseError> {
|
||||
let ranking_rules = self.index.main.ranking_rules(reader)?;
|
||||
|
||||
if let Some(ranking_rules) = ranking_rules {
|
||||
let mut builder = CriteriaBuilder::with_capacity(7 + ranking_rules.len());
|
||||
for rule in ranking_rules {
|
||||
match rule {
|
||||
RankingRule::Typo => builder.push(Typo),
|
||||
RankingRule::Words => builder.push(Words),
|
||||
RankingRule::Proximity => builder.push(Proximity),
|
||||
RankingRule::Attribute => builder.push(Attribute),
|
||||
RankingRule::WordsPosition => builder.push(WordsPosition),
|
||||
RankingRule::Exactness => builder.push(Exactness),
|
||||
RankingRule::Asc(field) => {
|
||||
match SortByAttr::lower_is_better(&ranked_map, &schema, &field) {
|
||||
Ok(rule) => builder.push(rule),
|
||||
Err(err) => error!("Error during criteria builder; {:?}", err),
|
||||
}
|
||||
}
|
||||
RankingRule::Desc(field) => {
|
||||
match SortByAttr::higher_is_better(&ranked_map, &schema, &field) {
|
||||
Ok(rule) => builder.push(rule),
|
||||
Err(err) => error!("Error during criteria builder; {:?}", err),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
builder.push(DocumentId);
|
||||
return Ok(Some(builder.build()));
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)]
|
||||
pub struct MatchPosition {
|
||||
pub start: usize,
|
||||
pub length: usize,
|
||||
}
|
||||
|
||||
impl PartialOrd for MatchPosition {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl Ord for MatchPosition {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
match self.start.cmp(&other.start) {
|
||||
Ordering::Equal => self.length.cmp(&other.length),
|
||||
_ => self.start.cmp(&other.start),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub type HighlightInfos = HashMap<String, Value>;
|
||||
pub type MatchesInfos = HashMap<String, Vec<MatchPosition>>;
|
||||
// pub type RankingInfos = HashMap<String, u64>;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SearchHit {
|
||||
#[serde(flatten)]
|
||||
pub document: IndexMap<String, Value>,
|
||||
#[serde(rename = "_formatted", skip_serializing_if = "IndexMap::is_empty")]
|
||||
pub formatted: IndexMap<String, Value>,
|
||||
#[serde(rename = "_matchesInfo", skip_serializing_if = "Option::is_none")]
|
||||
pub matches_info: Option<MatchesInfos>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct SearchResult {
|
||||
pub hits: Vec<SearchHit>,
|
||||
pub offset: usize,
|
||||
pub limit: usize,
|
||||
pub nb_hits: usize,
|
||||
pub exhaustive_nb_hits: bool,
|
||||
pub processing_time_ms: usize,
|
||||
pub query: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub facets_distribution: Option<HashMap<String, HashMap<String, usize>>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub exhaustive_facets_count: Option<bool>,
|
||||
}
|
||||
|
||||
/// returns the start index and the length on the crop.
|
||||
fn aligned_crop(text: &str, match_index: usize, context: usize) -> (usize, usize) {
|
||||
let is_word_component = |c: &char| c.is_alphanumeric() && !super::is_cjk(*c);
|
||||
|
||||
let word_end_index = |mut index| {
|
||||
if text.chars().nth(index - 1).map_or(false, |c| is_word_component(&c)) {
|
||||
index += text.chars().skip(index).take_while(is_word_component).count();
|
||||
}
|
||||
index
|
||||
};
|
||||
|
||||
if context == 0 {
|
||||
// count need to be at least 1 for cjk queries to return something
|
||||
return (match_index, 1 + text.chars().skip(match_index).take_while(is_word_component).count());
|
||||
}
|
||||
let start = match match_index.saturating_sub(context) {
|
||||
0 => 0,
|
||||
n => {
|
||||
let word_end_index = word_end_index(n);
|
||||
// skip whitespaces if any
|
||||
word_end_index + text.chars().skip(word_end_index).take_while(char::is_ascii_whitespace).count()
|
||||
}
|
||||
};
|
||||
let end = word_end_index(match_index + context);
|
||||
|
||||
(start, end - start)
|
||||
}
|
||||
|
||||
fn crop_text(
|
||||
text: &str,
|
||||
matches: impl IntoIterator<Item = Highlight>,
|
||||
context: usize,
|
||||
) -> (String, Vec<Highlight>) {
|
||||
let mut matches = matches.into_iter().peekable();
|
||||
|
||||
let char_index = matches.peek().map(|m| m.char_index as usize).unwrap_or(0);
|
||||
let (start, count) = aligned_crop(text, char_index, context);
|
||||
|
||||
// TODO do something about double allocation
|
||||
let text = text
|
||||
.chars()
|
||||
.skip(start)
|
||||
.take(count)
|
||||
.collect::<String>()
|
||||
.trim()
|
||||
.to_string();
|
||||
|
||||
// update matches index to match the new cropped text
|
||||
let matches = matches
|
||||
.take_while(|m| (m.char_index as usize) + (m.char_length as usize) <= start + count)
|
||||
.map(|m| Highlight {
|
||||
char_index: m.char_index - start as u16,
|
||||
..m
|
||||
})
|
||||
.collect();
|
||||
|
||||
(text, matches)
|
||||
}
|
||||
|
||||
fn crop_document(
|
||||
document: &mut IndexMap<String, Value>,
|
||||
matches: &mut Vec<Highlight>,
|
||||
schema: &Schema,
|
||||
fields: &HashMap<String, usize>,
|
||||
) {
|
||||
matches.sort_unstable_by_key(|m| (m.char_index, m.char_length));
|
||||
|
||||
for (field, length) in fields {
|
||||
let attribute = match schema.id(field) {
|
||||
Some(attribute) => attribute,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let selected_matches = matches
|
||||
.iter()
|
||||
.filter(|m| FieldId::new(m.attribute) == attribute)
|
||||
.cloned();
|
||||
|
||||
if let Some(Value::String(ref mut original_text)) = document.get_mut(field) {
|
||||
let (cropped_text, cropped_matches) =
|
||||
crop_text(original_text, selected_matches, *length);
|
||||
|
||||
*original_text = cropped_text;
|
||||
|
||||
matches.retain(|m| FieldId::new(m.attribute) != attribute);
|
||||
matches.extend_from_slice(&cropped_matches);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn calculate_matches(
|
||||
matches: &[Highlight],
|
||||
attributes_to_retrieve: Option<HashSet<String>>,
|
||||
schema: &Schema,
|
||||
) -> MatchesInfos {
|
||||
let mut matches_result: HashMap<String, Vec<MatchPosition>> = HashMap::new();
|
||||
for m in matches.iter() {
|
||||
if let Some(attribute) = schema.name(FieldId::new(m.attribute)) {
|
||||
if let Some(ref attributes_to_retrieve) = attributes_to_retrieve {
|
||||
if !attributes_to_retrieve.contains(attribute) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if !schema.displayed_names().contains(&attribute) {
|
||||
continue;
|
||||
}
|
||||
if let Some(pos) = matches_result.get_mut(attribute) {
|
||||
pos.push(MatchPosition {
|
||||
start: m.char_index as usize,
|
||||
length: m.char_length as usize,
|
||||
});
|
||||
} else {
|
||||
let mut positions = Vec::new();
|
||||
positions.push(MatchPosition {
|
||||
start: m.char_index as usize,
|
||||
length: m.char_length as usize,
|
||||
});
|
||||
matches_result.insert(attribute.to_string(), positions);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (_, val) in matches_result.iter_mut() {
|
||||
val.sort_unstable();
|
||||
val.dedup();
|
||||
}
|
||||
matches_result
|
||||
}
|
||||
|
||||
fn calculate_highlights(
|
||||
document: &IndexMap<String, Value>,
|
||||
matches: &MatchesInfos,
|
||||
attributes_to_highlight: &HashSet<String>,
|
||||
) -> IndexMap<String, Value> {
|
||||
let mut highlight_result = document.clone();
|
||||
|
||||
for (attribute, matches) in matches.iter() {
|
||||
if attributes_to_highlight.contains(attribute) {
|
||||
if let Some(Value::String(value)) = document.get(attribute) {
|
||||
let value = value;
|
||||
let mut highlighted_value = String::new();
|
||||
let mut index = 0;
|
||||
|
||||
let longest_matches = matches
|
||||
.linear_group_by_key(|m| m.start)
|
||||
.map(|group| group.last().unwrap())
|
||||
.filter(move |m| m.start >= index);
|
||||
|
||||
for m in longest_matches {
|
||||
let before = value.get(index..m.start);
|
||||
let highlighted = value.get(m.start..(m.start + m.length));
|
||||
if let (Some(before), Some(highlighted)) = (before, highlighted) {
|
||||
highlighted_value.push_str(before);
|
||||
highlighted_value.push_str("<em>");
|
||||
highlighted_value.push_str(highlighted);
|
||||
highlighted_value.push_str("</em>");
|
||||
index = m.start + m.length;
|
||||
} else {
|
||||
error!("value: {:?}; index: {:?}, match: {:?}", value, index, m);
|
||||
}
|
||||
}
|
||||
highlighted_value.push_str(&value[index..]);
|
||||
highlight_result.insert(attribute.to_string(), Value::String(highlighted_value));
|
||||
};
|
||||
}
|
||||
}
|
||||
highlight_result
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn aligned_crops() {
|
||||
let text = r#"En ce début de trentième millénaire, l'Empire n'a jamais été aussi puissant, aussi étendu à travers toute la galaxie. C'est dans sa capitale, Trantor, que l'éminent savant Hari Seldon invente la psychohistoire, une science toute nouvelle, à base de psychologie et de mathématiques, qui lui permet de prédire l'avenir... C'est-à-dire l'effondrement de l'Empire d'ici cinq siècles et au-delà, trente mille années de chaos et de ténèbres. Pour empêcher cette catastrophe et sauver la civilisation, Seldon crée la Fondation."#;
|
||||
|
||||
// simple test
|
||||
let (start, length) = aligned_crop(&text, 6, 2);
|
||||
let cropped = text.chars().skip(start).take(length).collect::<String>().trim().to_string();
|
||||
assert_eq!("début", cropped);
|
||||
|
||||
// first word test
|
||||
let (start, length) = aligned_crop(&text, 0, 1);
|
||||
let cropped = text.chars().skip(start).take(length).collect::<String>().trim().to_string();
|
||||
assert_eq!("En", cropped);
|
||||
// last word test
|
||||
let (start, length) = aligned_crop(&text, 510, 2);
|
||||
let cropped = text.chars().skip(start).take(length).collect::<String>().trim().to_string();
|
||||
assert_eq!("Fondation", cropped);
|
||||
|
||||
// CJK tests
|
||||
let text = "this isのス foo myタイリ test";
|
||||
|
||||
// mixed charset
|
||||
let (start, length) = aligned_crop(&text, 5, 3);
|
||||
let cropped = text.chars().skip(start).take(length).collect::<String>().trim().to_string();
|
||||
assert_eq!("isの", cropped);
|
||||
|
||||
// split regular word / CJK word, no space
|
||||
let (start, length) = aligned_crop(&text, 7, 1);
|
||||
let cropped = text.chars().skip(start).take(length).collect::<String>().trim().to_string();
|
||||
assert_eq!("の", cropped);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn calculate_matches() {
|
||||
let mut matches = Vec::new();
|
||||
matches.push(Highlight { attribute: 0, char_index: 0, char_length: 3});
|
||||
matches.push(Highlight { attribute: 0, char_index: 0, char_length: 2});
|
||||
|
||||
let mut attributes_to_retrieve: HashSet<String> = HashSet::new();
|
||||
attributes_to_retrieve.insert("title".to_string());
|
||||
|
||||
let schema = Schema::with_primary_key("title");
|
||||
|
||||
let matches_result = super::calculate_matches(&matches, Some(attributes_to_retrieve), &schema);
|
||||
|
||||
let mut matches_result_expected: HashMap<String, Vec<MatchPosition>> = HashMap::new();
|
||||
|
||||
let mut positions = Vec::new();
|
||||
positions.push(MatchPosition {
|
||||
start: 0,
|
||||
length: 2,
|
||||
});
|
||||
positions.push(MatchPosition {
|
||||
start: 0,
|
||||
length: 3,
|
||||
});
|
||||
matches_result_expected.insert("title".to_string(), positions);
|
||||
|
||||
assert_eq!(matches_result, matches_result_expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn calculate_highlights() {
|
||||
let data = r#"{
|
||||
"title": "Fondation (Isaac ASIMOV)",
|
||||
"description": "En ce début de trentième millénaire, l'Empire n'a jamais été aussi puissant, aussi étendu à travers toute la galaxie. C'est dans sa capitale, Trantor, que l'éminent savant Hari Seldon invente la psychohistoire, une science toute nouvelle, à base de psychologie et de mathématiques, qui lui permet de prédire l'avenir... C'est-à-dire l'effondrement de l'Empire d'ici cinq siècles et au-delà, trente mille années de chaos et de ténèbres. Pour empêcher cette catastrophe et sauver la civilisation, Seldon crée la Fondation."
|
||||
}"#;
|
||||
|
||||
let document: IndexMap<String, Value> = serde_json::from_str(data).unwrap();
|
||||
let mut attributes_to_highlight = HashSet::new();
|
||||
attributes_to_highlight.insert("title".to_string());
|
||||
attributes_to_highlight.insert("description".to_string());
|
||||
|
||||
let mut matches = HashMap::new();
|
||||
|
||||
let mut m = Vec::new();
|
||||
m.push(MatchPosition {
|
||||
start: 0,
|
||||
length: 9,
|
||||
});
|
||||
matches.insert("title".to_string(), m);
|
||||
|
||||
let mut m = Vec::new();
|
||||
m.push(MatchPosition {
|
||||
start: 529,
|
||||
length: 9,
|
||||
});
|
||||
matches.insert("description".to_string(), m);
|
||||
let result = super::calculate_highlights(&document, &matches, &attributes_to_highlight);
|
||||
|
||||
let mut result_expected = IndexMap::new();
|
||||
result_expected.insert(
|
||||
"title".to_string(),
|
||||
Value::String("<em>Fondation</em> (Isaac ASIMOV)".to_string()),
|
||||
);
|
||||
result_expected.insert("description".to_string(), Value::String("En ce début de trentième millénaire, l'Empire n'a jamais été aussi puissant, aussi étendu à travers toute la galaxie. C'est dans sa capitale, Trantor, que l'éminent savant Hari Seldon invente la psychohistoire, une science toute nouvelle, à base de psychologie et de mathématiques, qui lui permet de prédire l'avenir... C'est-à-dire l'effondrement de l'Empire d'ici cinq siècles et au-delà, trente mille années de chaos et de ténèbres. Pour empêcher cette catastrophe et sauver la civilisation, Seldon crée la <em>Fondation</em>.".to_string()));
|
||||
|
||||
assert_eq!(result, result_expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn highlight_longest_match() {
|
||||
let data = r#"{
|
||||
"title": "Ice"
|
||||
}"#;
|
||||
|
||||
let document: IndexMap<String, Value> = serde_json::from_str(data).unwrap();
|
||||
let mut attributes_to_highlight = HashSet::new();
|
||||
attributes_to_highlight.insert("title".to_string());
|
||||
|
||||
let mut matches = HashMap::new();
|
||||
|
||||
let mut m = Vec::new();
|
||||
m.push(MatchPosition {
|
||||
start: 0,
|
||||
length: 2,
|
||||
});
|
||||
m.push(MatchPosition {
|
||||
start: 0,
|
||||
length: 3,
|
||||
});
|
||||
matches.insert("title".to_string(), m);
|
||||
|
||||
let result = super::calculate_highlights(&document, &matches, &attributes_to_highlight);
|
||||
|
||||
let mut result_expected = IndexMap::new();
|
||||
result_expected.insert(
|
||||
"title".to_string(),
|
||||
Value::String("<em>Ice</em>".to_string()),
|
||||
);
|
||||
|
||||
assert_eq!(result, result_expected);
|
||||
}
|
||||
}
|
@ -1,26 +0,0 @@
|
||||
pub mod authentication;
|
||||
pub mod meilisearch;
|
||||
pub mod normalize_path;
|
||||
pub mod compression;
|
||||
|
||||
pub use authentication::Authentication;
|
||||
pub use normalize_path::NormalizePath;
|
||||
|
||||
pub fn is_cjk(c: char) -> bool {
|
||||
('\u{1100}'..'\u{11ff}').contains(&c) // Hangul Jamo
|
||||
|| ('\u{2e80}'..'\u{2eff}').contains(&c) // CJK Radicals Supplement
|
||||
|| ('\u{2f00}'..'\u{2fdf}').contains(&c) // Kangxi radical
|
||||
|| ('\u{3000}'..'\u{303f}').contains(&c) // Japanese-style punctuation
|
||||
|| ('\u{3040}'..'\u{309f}').contains(&c) // Japanese Hiragana
|
||||
|| ('\u{30a0}'..'\u{30ff}').contains(&c) // Japanese Katakana
|
||||
|| ('\u{3100}'..'\u{312f}').contains(&c)
|
||||
|| ('\u{3130}'..'\u{318F}').contains(&c) // Hangul Compatibility Jamo
|
||||
|| ('\u{3200}'..'\u{32ff}').contains(&c) // Enclosed CJK Letters and Months
|
||||
|| ('\u{3400}'..'\u{4dbf}').contains(&c) // CJK Unified Ideographs Extension A
|
||||
|| ('\u{4e00}'..'\u{9fff}').contains(&c) // CJK Unified Ideographs
|
||||
|| ('\u{a960}'..'\u{a97f}').contains(&c) // Hangul Jamo Extended-A
|
||||
|| ('\u{ac00}'..'\u{d7a3}').contains(&c) // Hangul Syllables
|
||||
|| ('\u{d7b0}'..'\u{d7ff}').contains(&c) // Hangul Jamo Extended-B
|
||||
|| ('\u{f900}'..'\u{faff}').contains(&c) // CJK Compatibility Ideographs
|
||||
|| ('\u{ff00}'..'\u{ffef}').contains(&c) // Full-width roman characters and half-width katakana
|
||||
}
|
@ -1,86 +0,0 @@
|
||||
/// From https://docs.rs/actix-web/3.0.0-alpha.2/src/actix_web/middleware/normalize.rs.html#34
|
||||
use actix_http::Error;
|
||||
use actix_service::{Service, Transform};
|
||||
use actix_web::{
|
||||
dev::ServiceRequest,
|
||||
dev::ServiceResponse,
|
||||
http::uri::{PathAndQuery, Uri},
|
||||
};
|
||||
use futures::future::{ok, Ready};
|
||||
use regex::Regex;
|
||||
use std::task::{Context, Poll};
|
||||
pub struct NormalizePath;
|
||||
|
||||
impl<S, B> Transform<S> for NormalizePath
|
||||
where
|
||||
S: Service<Request = ServiceRequest, Response = ServiceResponse<B>, Error = Error>,
|
||||
S::Future: 'static,
|
||||
{
|
||||
type Request = ServiceRequest;
|
||||
type Response = ServiceResponse<B>;
|
||||
type Error = Error;
|
||||
type InitError = ();
|
||||
type Transform = NormalizePathNormalization<S>;
|
||||
type Future = Ready<Result<Self::Transform, Self::InitError>>;
|
||||
|
||||
fn new_transform(&self, service: S) -> Self::Future {
|
||||
ok(NormalizePathNormalization {
|
||||
service,
|
||||
merge_slash: Regex::new("//+").unwrap(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub struct NormalizePathNormalization<S> {
|
||||
service: S,
|
||||
merge_slash: Regex,
|
||||
}
|
||||
|
||||
impl<S, B> Service for NormalizePathNormalization<S>
|
||||
where
|
||||
S: Service<Request = ServiceRequest, Response = ServiceResponse<B>, Error = Error>,
|
||||
S::Future: 'static,
|
||||
{
|
||||
type Request = ServiceRequest;
|
||||
type Response = ServiceResponse<B>;
|
||||
type Error = Error;
|
||||
type Future = S::Future;
|
||||
|
||||
fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
|
||||
self.service.poll_ready(cx)
|
||||
}
|
||||
|
||||
fn call(&mut self, mut req: ServiceRequest) -> Self::Future {
|
||||
let head = req.head_mut();
|
||||
|
||||
// always add trailing slash, might be an extra one
|
||||
let path = head.uri.path().to_string() + "/";
|
||||
|
||||
if self.merge_slash.find(&path).is_some() {
|
||||
// normalize multiple /'s to one /
|
||||
let path = self.merge_slash.replace_all(&path, "/");
|
||||
|
||||
let path = if path.len() > 1 {
|
||||
path.trim_end_matches('/')
|
||||
} else {
|
||||
&path
|
||||
};
|
||||
|
||||
let mut parts = head.uri.clone().into_parts();
|
||||
let pq = parts.path_and_query.as_ref().unwrap();
|
||||
|
||||
let path = if let Some(q) = pq.query() {
|
||||
bytes::Bytes::from(format!("{}?{}", path, q))
|
||||
} else {
|
||||
bytes::Bytes::copy_from_slice(path.as_bytes())
|
||||
};
|
||||
parts.path_and_query = Some(PathAndQuery::from_maybe_shared(path).unwrap());
|
||||
|
||||
let uri = Uri::from_parts(parts).unwrap();
|
||||
req.match_info_mut().get_mut().update(&uri);
|
||||
req.head_mut().uri = uri;
|
||||
}
|
||||
|
||||
self.service.call(req)
|
||||
}
|
||||
}
|
@ -1,105 +0,0 @@
|
||||
#![allow(clippy::or_fun_call)]
|
||||
|
||||
pub mod data;
|
||||
pub mod error;
|
||||
pub mod helpers;
|
||||
pub mod models;
|
||||
pub mod option;
|
||||
pub mod routes;
|
||||
pub mod analytics;
|
||||
pub mod snapshot;
|
||||
pub mod dump;
|
||||
|
||||
use actix_http::Error;
|
||||
use actix_service::ServiceFactory;
|
||||
use actix_web::{dev, web, App};
|
||||
use chrono::Utc;
|
||||
use log::error;
|
||||
|
||||
use meilisearch_core::{Index, MainWriter, ProcessedUpdateResult};
|
||||
|
||||
pub use option::Opt;
|
||||
pub use self::data::Data;
|
||||
use self::error::{payload_error_handler, ResponseError};
|
||||
|
||||
pub fn create_app(
|
||||
data: &Data,
|
||||
enable_frontend: bool,
|
||||
) -> App<
|
||||
impl ServiceFactory<
|
||||
Config = (),
|
||||
Request = dev::ServiceRequest,
|
||||
Response = dev::ServiceResponse<actix_http::body::Body>,
|
||||
Error = Error,
|
||||
InitError = (),
|
||||
>,
|
||||
actix_http::body::Body,
|
||||
> {
|
||||
let app = App::new()
|
||||
.data(data.clone())
|
||||
.app_data(
|
||||
web::JsonConfig::default()
|
||||
.limit(data.http_payload_size_limit)
|
||||
.content_type(|_mime| true) // Accept all mime types
|
||||
.error_handler(|err, _req| payload_error_handler(err).into()),
|
||||
)
|
||||
.app_data(
|
||||
web::QueryConfig::default()
|
||||
.error_handler(|err, _req| payload_error_handler(err).into())
|
||||
)
|
||||
.configure(routes::document::services)
|
||||
.configure(routes::index::services)
|
||||
.configure(routes::search::services)
|
||||
.configure(routes::setting::services)
|
||||
.configure(routes::stop_words::services)
|
||||
.configure(routes::synonym::services)
|
||||
.configure(routes::health::services)
|
||||
.configure(routes::stats::services)
|
||||
.configure(routes::key::services)
|
||||
.configure(routes::dump::services);
|
||||
if enable_frontend {
|
||||
app
|
||||
.service(routes::load_html)
|
||||
.service(routes::load_css)
|
||||
} else {
|
||||
app
|
||||
.service(routes::running)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn index_update_callback_txn(index: Index, index_uid: &str, data: &Data, mut writer: &mut MainWriter) -> Result<(), String> {
|
||||
if let Err(e) = data.db.compute_stats(&mut writer, index_uid) {
|
||||
return Err(format!("Impossible to compute stats; {}", e));
|
||||
}
|
||||
|
||||
if let Err(e) = data.db.set_last_update(&mut writer, &Utc::now()) {
|
||||
return Err(format!("Impossible to update last_update; {}", e));
|
||||
}
|
||||
|
||||
if let Err(e) = index.main.put_updated_at(&mut writer) {
|
||||
return Err(format!("Impossible to update updated_at; {}", e));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn index_update_callback(index_uid: &str, data: &Data, status: ProcessedUpdateResult) {
|
||||
if status.error.is_some() {
|
||||
return;
|
||||
}
|
||||
|
||||
if let Some(index) = data.db.open_index(index_uid) {
|
||||
let db = &data.db;
|
||||
let res = db.main_write::<_, _, ResponseError>(|mut writer| {
|
||||
if let Err(e) = index_update_callback_txn(index, index_uid, data, &mut writer) {
|
||||
error!("{}", e);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
});
|
||||
match res {
|
||||
Ok(_) => (),
|
||||
Err(e) => error!("{}", e),
|
||||
}
|
||||
}
|
||||
}
|
@ -1,171 +0,0 @@
|
||||
use std::{env, thread};
|
||||
|
||||
use actix_cors::Cors;
|
||||
use actix_web::{middleware, HttpServer};
|
||||
use main_error::MainError;
|
||||
use meilisearch_http::helpers::NormalizePath;
|
||||
use meilisearch_http::{create_app, index_update_callback, Data, Opt};
|
||||
use structopt::StructOpt;
|
||||
use meilisearch_http::{snapshot, dump};
|
||||
|
||||
mod analytics;
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
#[global_allocator]
|
||||
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
|
||||
|
||||
#[actix_web::main]
|
||||
async fn main() -> Result<(), MainError> {
|
||||
let opt = Opt::from_args();
|
||||
|
||||
#[cfg(all(not(debug_assertions), feature = "sentry"))]
|
||||
let _sentry = sentry::init((
|
||||
if !opt.no_sentry {
|
||||
Some(opt.sentry_dsn.clone())
|
||||
} else {
|
||||
None
|
||||
},
|
||||
sentry::ClientOptions {
|
||||
release: sentry::release_name!(),
|
||||
..Default::default()
|
||||
},
|
||||
));
|
||||
|
||||
match opt.env.as_ref() {
|
||||
"production" => {
|
||||
if opt.master_key.is_none() {
|
||||
return Err(
|
||||
"In production mode, the environment variable MEILI_MASTER_KEY is mandatory"
|
||||
.into(),
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(all(not(debug_assertions), feature = "sentry"))]
|
||||
if !opt.no_sentry && _sentry.is_enabled() {
|
||||
sentry::integrations::panic::register_panic_handler(); // TODO: This shouldn't be needed when upgrading to sentry 0.19.0. These integrations are turned on by default when using `sentry::init`.
|
||||
sentry::integrations::env_logger::init(None, Default::default());
|
||||
}
|
||||
}
|
||||
"development" => {
|
||||
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
|
||||
if let Some(path) = &opt.import_snapshot {
|
||||
snapshot::load_snapshot(&opt.db_path, path, opt.ignore_snapshot_if_db_exists, opt.ignore_missing_snapshot)?;
|
||||
}
|
||||
|
||||
let data = Data::new(opt.clone())?;
|
||||
|
||||
if !opt.no_analytics {
|
||||
let analytics_data = data.clone();
|
||||
let analytics_opt = opt.clone();
|
||||
thread::spawn(move || analytics::analytics_sender(analytics_data, analytics_opt));
|
||||
}
|
||||
|
||||
let data_cloned = data.clone();
|
||||
data.db.set_update_callback(Box::new(move |name, status| {
|
||||
index_update_callback(name, &data_cloned, status);
|
||||
}));
|
||||
|
||||
|
||||
if let Some(path) = &opt.import_dump {
|
||||
dump::import_dump(&data, path, opt.dump_batch_size)?;
|
||||
}
|
||||
|
||||
if opt.schedule_snapshot {
|
||||
snapshot::schedule_snapshot(data.clone(), &opt.snapshot_dir, opt.snapshot_interval_sec.unwrap_or(86400))?;
|
||||
}
|
||||
|
||||
print_launch_resume(&opt, &data);
|
||||
|
||||
let enable_frontend = opt.env != "production";
|
||||
let http_server = HttpServer::new(move || {
|
||||
let cors = Cors::default()
|
||||
.send_wildcard()
|
||||
.allowed_headers(vec!["content-type", "x-meili-api-key"])
|
||||
.allow_any_origin()
|
||||
.allow_any_method()
|
||||
.max_age(86_400); // 24h
|
||||
|
||||
create_app(&data, enable_frontend)
|
||||
.wrap(cors)
|
||||
.wrap(middleware::Logger::default())
|
||||
.wrap(middleware::Compress::default())
|
||||
.wrap(NormalizePath)
|
||||
});
|
||||
|
||||
if let Some(config) = opt.get_ssl_config()? {
|
||||
http_server
|
||||
.bind_rustls(opt.http_addr, config)?
|
||||
.run()
|
||||
.await?;
|
||||
} else {
|
||||
http_server.bind(opt.http_addr)?.run().await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn print_launch_resume(opt: &Opt, data: &Data) {
|
||||
let ascii_name = r#"
|
||||
888b d888 d8b 888 d8b .d8888b. 888
|
||||
8888b d8888 Y8P 888 Y8P d88P Y88b 888
|
||||
88888b.d88888 888 Y88b. 888
|
||||
888Y88888P888 .d88b. 888 888 888 "Y888b. .d88b. 8888b. 888d888 .d8888b 88888b.
|
||||
888 Y888P 888 d8P Y8b 888 888 888 "Y88b. d8P Y8b "88b 888P" d88P" 888 "88b
|
||||
888 Y8P 888 88888888 888 888 888 "888 88888888 .d888888 888 888 888 888
|
||||
888 " 888 Y8b. 888 888 888 Y88b d88P Y8b. 888 888 888 Y88b. 888 888
|
||||
888 888 "Y8888 888 888 888 "Y8888P" "Y8888 "Y888888 888 "Y8888P 888 888
|
||||
"#;
|
||||
|
||||
eprintln!("{}", ascii_name);
|
||||
|
||||
eprintln!("Database path:\t\t{:?}", opt.db_path);
|
||||
eprintln!("Server listening on:\t\"http://{}\"", opt.http_addr);
|
||||
eprintln!("Environment:\t\t{:?}", opt.env);
|
||||
eprintln!("Commit SHA:\t\t{:?}", env!("VERGEN_SHA").to_string());
|
||||
eprintln!(
|
||||
"Build date:\t\t{:?}",
|
||||
env!("VERGEN_BUILD_TIMESTAMP").to_string()
|
||||
);
|
||||
eprintln!(
|
||||
"Package version:\t{:?}",
|
||||
env!("CARGO_PKG_VERSION").to_string()
|
||||
);
|
||||
|
||||
#[cfg(all(not(debug_assertions), feature = "sentry"))]
|
||||
eprintln!(
|
||||
"Sentry DSN:\t\t{:?}",
|
||||
if !opt.no_sentry {
|
||||
&opt.sentry_dsn
|
||||
} else {
|
||||
"Disabled"
|
||||
}
|
||||
);
|
||||
|
||||
eprintln!(
|
||||
"Anonymous telemetry:\t{:?}",
|
||||
if !opt.no_analytics {
|
||||
"Enabled"
|
||||
} else {
|
||||
"Disabled"
|
||||
}
|
||||
);
|
||||
|
||||
eprintln!();
|
||||
|
||||
if data.api_keys.master.is_some() {
|
||||
eprintln!("A Master Key has been set. Requests to MeiliSearch won't be authorized unless you provide an authentication key.");
|
||||
} else {
|
||||
eprintln!("No master key found; The server will accept unidentified requests. \
|
||||
If you need some protection in development mode, please export a key: export MEILI_MASTER_KEY=xxx");
|
||||
}
|
||||
|
||||
eprintln!();
|
||||
eprintln!("Documentation:\t\thttps://docs.meilisearch.com");
|
||||
eprintln!("Source code:\t\thttps://github.com/meilisearch/meilisearch");
|
||||
eprintln!("Contact:\t\thttps://docs.meilisearch.com/learn/what_is_meilisearch/contact.html or bonjour@meilisearch.com");
|
||||
eprintln!();
|
||||
}
|
@ -1 +0,0 @@
|
||||
pub mod update_operation;
|
@ -1,33 +0,0 @@
|
||||
use std::fmt;
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[derive(Debug)]
|
||||
pub enum UpdateOperation {
|
||||
ClearAllDocuments,
|
||||
DocumentsAddition,
|
||||
DocumentsDeletion,
|
||||
SynonymsUpdate,
|
||||
SynonymsDeletion,
|
||||
StopWordsAddition,
|
||||
StopWordsDeletion,
|
||||
Schema,
|
||||
Config,
|
||||
}
|
||||
|
||||
impl fmt::Display for UpdateOperation {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> std::fmt::Result {
|
||||
use UpdateOperation::*;
|
||||
|
||||
match self {
|
||||
ClearAllDocuments => write!(f, "ClearAllDocuments"),
|
||||
DocumentsAddition => write!(f, "DocumentsAddition"),
|
||||
DocumentsDeletion => write!(f, "DocumentsDeletion"),
|
||||
SynonymsUpdate => write!(f, "SynonymsUpdate"),
|
||||
SynonymsDeletion => write!(f, "SynonymsDelettion"),
|
||||
StopWordsAddition => write!(f, "StopWordsAddition"),
|
||||
StopWordsDeletion => write!(f, "StopWordsDeletion"),
|
||||
Schema => write!(f, "Schema"),
|
||||
Config => write!(f, "Config"),
|
||||
}
|
||||
}
|
||||
}
|
@ -1,221 +0,0 @@
|
||||
use std::{error, fs};
|
||||
use std::io::{BufReader, Read};
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
|
||||
use rustls::internal::pemfile::{certs, pkcs8_private_keys, rsa_private_keys};
|
||||
use rustls::{
|
||||
AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient, NoClientAuth,
|
||||
RootCertStore,
|
||||
};
|
||||
use structopt::StructOpt;
|
||||
|
||||
const POSSIBLE_ENV: [&str; 2] = ["development", "production"];
|
||||
|
||||
#[derive(Debug, Default, Clone, StructOpt)]
|
||||
pub struct Opt {
|
||||
/// The destination where the database must be created.
|
||||
#[structopt(long, env = "MEILI_DB_PATH", default_value = "./data.ms")]
|
||||
pub db_path: String,
|
||||
|
||||
/// The address on which the http server will listen.
|
||||
#[structopt(long, env = "MEILI_HTTP_ADDR", default_value = "127.0.0.1:7700")]
|
||||
pub http_addr: String,
|
||||
|
||||
/// The master key allowing you to do everything on the server.
|
||||
#[structopt(long, env = "MEILI_MASTER_KEY")]
|
||||
pub master_key: Option<String>,
|
||||
|
||||
/// The Sentry DSN to use for error reporting. This defaults to the MeiliSearch Sentry project.
|
||||
/// You can disable sentry all together using the `--no-sentry` flag or `MEILI_NO_SENTRY` environment variable.
|
||||
#[cfg(all(not(debug_assertions), feature = "sentry"))]
|
||||
#[structopt(long, env = "SENTRY_DSN", default_value = "https://5ddfa22b95f241198be2271aaf028653@sentry.io/3060337")]
|
||||
pub sentry_dsn: String,
|
||||
|
||||
/// Disable Sentry error reporting.
|
||||
#[cfg(all(not(debug_assertions), feature = "sentry"))]
|
||||
#[structopt(long, env = "MEILI_NO_SENTRY")]
|
||||
pub no_sentry: bool,
|
||||
|
||||
/// This environment variable must be set to `production` if you are running in production.
|
||||
/// If the server is running in development mode more logs will be displayed,
|
||||
/// and the master key can be avoided which implies that there is no security on the updates routes.
|
||||
/// This is useful to debug when integrating the engine with another service.
|
||||
#[structopt(long, env = "MEILI_ENV", default_value = "development", possible_values = &POSSIBLE_ENV)]
|
||||
pub env: String,
|
||||
|
||||
/// Do not send analytics to Meili.
|
||||
#[structopt(long, env = "MEILI_NO_ANALYTICS")]
|
||||
pub no_analytics: bool,
|
||||
|
||||
/// The maximum size, in bytes, of the main lmdb database directory
|
||||
#[structopt(long, env = "MEILI_MAX_MDB_SIZE", default_value = "107374182400")] // 100GB
|
||||
pub max_mdb_size: usize,
|
||||
|
||||
/// The maximum size, in bytes, of the update lmdb database directory
|
||||
#[structopt(long, env = "MEILI_MAX_UDB_SIZE", default_value = "107374182400")] // 100GB
|
||||
pub max_udb_size: usize,
|
||||
|
||||
/// The maximum size, in bytes, of accepted JSON payloads
|
||||
#[structopt(long, env = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT", default_value = "104857600")] // 100MB
|
||||
pub http_payload_size_limit: usize,
|
||||
|
||||
/// Read server certificates from CERTFILE.
|
||||
/// This should contain PEM-format certificates
|
||||
/// in the right order (the first certificate should
|
||||
/// certify KEYFILE, the last should be a root CA).
|
||||
#[structopt(long, env = "MEILI_SSL_CERT_PATH", parse(from_os_str))]
|
||||
pub ssl_cert_path: Option<PathBuf>,
|
||||
|
||||
/// Read private key from KEYFILE. This should be a RSA
|
||||
/// private key or PKCS8-encoded private key, in PEM format.
|
||||
#[structopt(long, env = "MEILI_SSL_KEY_PATH", parse(from_os_str))]
|
||||
pub ssl_key_path: Option<PathBuf>,
|
||||
|
||||
/// Enable client authentication, and accept certificates
|
||||
/// signed by those roots provided in CERTFILE.
|
||||
#[structopt(long, env = "MEILI_SSL_AUTH_PATH", parse(from_os_str))]
|
||||
pub ssl_auth_path: Option<PathBuf>,
|
||||
|
||||
/// Read DER-encoded OCSP response from OCSPFILE and staple to certificate.
|
||||
/// Optional
|
||||
#[structopt(long, env = "MEILI_SSL_OCSP_PATH", parse(from_os_str))]
|
||||
pub ssl_ocsp_path: Option<PathBuf>,
|
||||
|
||||
/// Send a fatal alert if the client does not complete client authentication.
|
||||
#[structopt(long, env = "MEILI_SSL_REQUIRE_AUTH")]
|
||||
pub ssl_require_auth: bool,
|
||||
|
||||
/// SSL support session resumption
|
||||
#[structopt(long, env = "MEILI_SSL_RESUMPTION")]
|
||||
pub ssl_resumption: bool,
|
||||
|
||||
/// SSL support tickets.
|
||||
#[structopt(long, env = "MEILI_SSL_TICKETS")]
|
||||
pub ssl_tickets: bool,
|
||||
|
||||
/// Defines the path of the snapshot file to import.
|
||||
/// This option will, by default, stop the process if a database already exist or if no snapshot exists at
|
||||
/// the given path. If this option is not specified no snapshot is imported.
|
||||
#[structopt(long)]
|
||||
pub import_snapshot: Option<PathBuf>,
|
||||
|
||||
/// The engine will ignore a missing snapshot and not return an error in such case.
|
||||
#[structopt(long, requires = "import-snapshot")]
|
||||
pub ignore_missing_snapshot: bool,
|
||||
|
||||
/// The engine will skip snapshot importation and not return an error in such case.
|
||||
#[structopt(long, requires = "import-snapshot")]
|
||||
pub ignore_snapshot_if_db_exists: bool,
|
||||
|
||||
/// Defines the directory path where meilisearch will create snapshot each snapshot_time_gap.
|
||||
#[structopt(long, env = "MEILI_SNAPSHOT_DIR", default_value = "snapshots/")]
|
||||
pub snapshot_dir: PathBuf,
|
||||
|
||||
/// Activate snapshot scheduling.
|
||||
#[structopt(long, env = "MEILI_SCHEDULE_SNAPSHOT")]
|
||||
pub schedule_snapshot: bool,
|
||||
|
||||
/// Defines time interval, in seconds, between each snapshot creation.
|
||||
#[structopt(long, env = "MEILI_SNAPSHOT_INTERVAL_SEC")]
|
||||
pub snapshot_interval_sec: Option<u64>,
|
||||
|
||||
/// Folder where dumps are created when the dump route is called.
|
||||
#[structopt(long, env = "MEILI_DUMPS_DIR", default_value = "dumps/")]
|
||||
pub dumps_dir: PathBuf,
|
||||
|
||||
/// Import a dump from the specified path, must be a `.tar.gz` file.
|
||||
#[structopt(long, conflicts_with = "import-snapshot")]
|
||||
pub import_dump: Option<PathBuf>,
|
||||
|
||||
/// The batch size used in the importation process, the bigger it is the faster the dump is created.
|
||||
#[structopt(long, env = "MEILI_DUMP_BATCH_SIZE", default_value = "1024")]
|
||||
pub dump_batch_size: usize,
|
||||
}
|
||||
|
||||
impl Opt {
|
||||
pub fn get_ssl_config(&self) -> Result<Option<rustls::ServerConfig>, Box<dyn error::Error>> {
|
||||
if let (Some(cert_path), Some(key_path)) = (&self.ssl_cert_path, &self.ssl_key_path) {
|
||||
let client_auth = match &self.ssl_auth_path {
|
||||
Some(auth_path) => {
|
||||
let roots = load_certs(auth_path.to_path_buf())?;
|
||||
let mut client_auth_roots = RootCertStore::empty();
|
||||
for root in roots {
|
||||
client_auth_roots.add(&root).unwrap();
|
||||
}
|
||||
if self.ssl_require_auth {
|
||||
AllowAnyAuthenticatedClient::new(client_auth_roots)
|
||||
} else {
|
||||
AllowAnyAnonymousOrAuthenticatedClient::new(client_auth_roots)
|
||||
}
|
||||
}
|
||||
None => NoClientAuth::new(),
|
||||
};
|
||||
|
||||
let mut config = rustls::ServerConfig::new(client_auth);
|
||||
config.key_log = Arc::new(rustls::KeyLogFile::new());
|
||||
|
||||
let certs = load_certs(cert_path.to_path_buf())?;
|
||||
let privkey = load_private_key(key_path.to_path_buf())?;
|
||||
let ocsp = load_ocsp(&self.ssl_ocsp_path)?;
|
||||
config
|
||||
.set_single_cert_with_ocsp_and_sct(certs, privkey, ocsp, vec![])
|
||||
.map_err(|_| "bad certificates/private key")?;
|
||||
|
||||
if self.ssl_resumption {
|
||||
config.set_persistence(rustls::ServerSessionMemoryCache::new(256));
|
||||
}
|
||||
|
||||
if self.ssl_tickets {
|
||||
config.ticketer = rustls::Ticketer::new();
|
||||
}
|
||||
|
||||
Ok(Some(config))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn load_certs(filename: PathBuf) -> Result<Vec<rustls::Certificate>, Box<dyn error::Error>> {
|
||||
let certfile = fs::File::open(filename).map_err(|_| "cannot open certificate file")?;
|
||||
let mut reader = BufReader::new(certfile);
|
||||
Ok(certs(&mut reader).map_err(|_| "cannot read certificate file")?)
|
||||
}
|
||||
|
||||
fn load_private_key(filename: PathBuf) -> Result<rustls::PrivateKey, Box<dyn error::Error>> {
|
||||
let rsa_keys = {
|
||||
let keyfile =
|
||||
fs::File::open(filename.clone()).map_err(|_| "cannot open private key file")?;
|
||||
let mut reader = BufReader::new(keyfile);
|
||||
rsa_private_keys(&mut reader).map_err(|_| "file contains invalid rsa private key")?
|
||||
};
|
||||
|
||||
let pkcs8_keys = {
|
||||
let keyfile = fs::File::open(filename).map_err(|_| "cannot open private key file")?;
|
||||
let mut reader = BufReader::new(keyfile);
|
||||
pkcs8_private_keys(&mut reader)
|
||||
.map_err(|_| "file contains invalid pkcs8 private key (encrypted keys not supported)")?
|
||||
};
|
||||
|
||||
// prefer to load pkcs8 keys
|
||||
if !pkcs8_keys.is_empty() {
|
||||
Ok(pkcs8_keys[0].clone())
|
||||
} else {
|
||||
assert!(!rsa_keys.is_empty());
|
||||
Ok(rsa_keys[0].clone())
|
||||
}
|
||||
}
|
||||
|
||||
fn load_ocsp(filename: &Option<PathBuf>) -> Result<Vec<u8>, Box<dyn error::Error>> {
|
||||
let mut ret = Vec::new();
|
||||
|
||||
if let Some(ref name) = filename {
|
||||
fs::File::open(name)
|
||||
.map_err(|_| "cannot open ocsp file")?
|
||||
.read_to_end(&mut ret)
|
||||
.map_err(|_| "cannot read oscp file")?;
|
||||
}
|
||||
|
||||
Ok(ret)
|
||||
}
|
@ -1,266 +0,0 @@
|
||||
use std::collections::{BTreeSet, HashSet};
|
||||
|
||||
use actix_web::{delete, get, post, put};
|
||||
use actix_web::{web, HttpResponse};
|
||||
use indexmap::IndexMap;
|
||||
use meilisearch_core::{update, MainReader};
|
||||
use serde_json::Value;
|
||||
use serde::Deserialize;
|
||||
|
||||
use crate::Data;
|
||||
use crate::error::{Error, ResponseError};
|
||||
use crate::helpers::Authentication;
|
||||
use crate::routes::{IndexParam, IndexUpdateResponse};
|
||||
|
||||
type Document = IndexMap<String, Value>;
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct DocumentParam {
|
||||
index_uid: String,
|
||||
document_id: String,
|
||||
}
|
||||
|
||||
pub fn services(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(get_document)
|
||||
.service(delete_document)
|
||||
.service(get_all_documents)
|
||||
.service(add_documents)
|
||||
.service(update_documents)
|
||||
.service(delete_documents)
|
||||
.service(clear_all_documents);
|
||||
}
|
||||
|
||||
#[get(
|
||||
"/indexes/{index_uid}/documents/{document_id}",
|
||||
wrap = "Authentication::Public"
|
||||
)]
|
||||
async fn get_document(
|
||||
data: web::Data<Data>,
|
||||
path: web::Path<DocumentParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index = data
|
||||
.db
|
||||
.open_index(&path.index_uid)
|
||||
.ok_or(Error::index_not_found(&path.index_uid))?;
|
||||
|
||||
let reader = data.db.main_read_txn()?;
|
||||
|
||||
let internal_id = index
|
||||
.main
|
||||
.external_to_internal_docid(&reader, &path.document_id)?
|
||||
.ok_or(Error::document_not_found(&path.document_id))?;
|
||||
|
||||
let document: Document = index
|
||||
.document(&reader, None, internal_id)?
|
||||
.ok_or(Error::document_not_found(&path.document_id))?;
|
||||
|
||||
Ok(HttpResponse::Ok().json(document))
|
||||
}
|
||||
|
||||
#[delete(
|
||||
"/indexes/{index_uid}/documents/{document_id}",
|
||||
wrap = "Authentication::Private"
|
||||
)]
|
||||
async fn delete_document(
|
||||
data: web::Data<Data>,
|
||||
path: web::Path<DocumentParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index = data
|
||||
.db
|
||||
.open_index(&path.index_uid)
|
||||
.ok_or(Error::index_not_found(&path.index_uid))?;
|
||||
|
||||
let mut documents_deletion = index.documents_deletion();
|
||||
documents_deletion.delete_document_by_external_docid(path.document_id.clone());
|
||||
|
||||
let update_id = data.db.update_write(|w| documents_deletion.finalize(w))?;
|
||||
|
||||
Ok(HttpResponse::Accepted().json(IndexUpdateResponse::with_id(update_id)))
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||
struct BrowseQuery {
|
||||
offset: Option<usize>,
|
||||
limit: Option<usize>,
|
||||
attributes_to_retrieve: Option<String>,
|
||||
}
|
||||
|
||||
pub fn get_all_documents_sync(
|
||||
data: &web::Data<Data>,
|
||||
reader: &MainReader,
|
||||
index_uid: &str,
|
||||
offset: usize,
|
||||
limit: usize,
|
||||
attributes_to_retrieve: Option<&String>
|
||||
) -> Result<Vec<Document>, Error> {
|
||||
let index = data
|
||||
.db
|
||||
.open_index(index_uid)
|
||||
.ok_or(Error::index_not_found(index_uid))?;
|
||||
|
||||
|
||||
let documents_ids: Result<BTreeSet<_>, _> = index
|
||||
.documents_fields_counts
|
||||
.documents_ids(reader)?
|
||||
.skip(offset)
|
||||
.take(limit)
|
||||
.collect();
|
||||
|
||||
let attributes: Option<HashSet<&str>> = attributes_to_retrieve
|
||||
.map(|a| a.split(',').collect());
|
||||
|
||||
let mut documents = Vec::new();
|
||||
for document_id in documents_ids? {
|
||||
if let Ok(Some(document)) =
|
||||
index.document::<Document>(reader, attributes.as_ref(), document_id)
|
||||
{
|
||||
documents.push(document);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(documents)
|
||||
}
|
||||
|
||||
#[get("/indexes/{index_uid}/documents", wrap = "Authentication::Public")]
|
||||
async fn get_all_documents(
|
||||
data: web::Data<Data>,
|
||||
path: web::Path<IndexParam>,
|
||||
params: web::Query<BrowseQuery>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let offset = params.offset.unwrap_or(0);
|
||||
let limit = params.limit.unwrap_or(20);
|
||||
let index_uid = &path.index_uid;
|
||||
let reader = data.db.main_read_txn()?;
|
||||
|
||||
let documents = get_all_documents_sync(
|
||||
&data,
|
||||
&reader,
|
||||
index_uid,
|
||||
offset,
|
||||
limit,
|
||||
params.attributes_to_retrieve.as_ref()
|
||||
)?;
|
||||
|
||||
Ok(HttpResponse::Ok().json(documents))
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||
struct UpdateDocumentsQuery {
|
||||
primary_key: Option<String>,
|
||||
}
|
||||
|
||||
async fn update_multiple_documents(
|
||||
data: web::Data<Data>,
|
||||
path: web::Path<IndexParam>,
|
||||
params: web::Query<UpdateDocumentsQuery>,
|
||||
body: web::Json<Vec<Document>>,
|
||||
is_partial: bool,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let update_id = data.get_or_create_index(&path.index_uid, |index| {
|
||||
|
||||
let mut document_addition = if is_partial {
|
||||
index.documents_partial_addition()
|
||||
} else {
|
||||
index.documents_addition()
|
||||
};
|
||||
|
||||
// Return an early error if primary key is already set, otherwise, try to set it up in the
|
||||
// update later.
|
||||
let reader = data.db.main_read_txn()?;
|
||||
let schema = index
|
||||
.main
|
||||
.schema(&reader)?
|
||||
.ok_or(meilisearch_core::Error::SchemaMissing)?;
|
||||
|
||||
match (params.into_inner().primary_key, schema.primary_key()) {
|
||||
(Some(key), None) => document_addition.set_primary_key(key),
|
||||
(None, None) => {
|
||||
let key = body
|
||||
.first()
|
||||
.and_then(find_primary_key)
|
||||
.ok_or(meilisearch_core::Error::MissingPrimaryKey)?;
|
||||
document_addition.set_primary_key(key);
|
||||
}
|
||||
_ => ()
|
||||
}
|
||||
|
||||
for document in body.into_inner() {
|
||||
document_addition.update_document(document);
|
||||
}
|
||||
|
||||
Ok(data.db.update_write(|w| document_addition.finalize(w))?)
|
||||
})?;
|
||||
return Ok(HttpResponse::Accepted().json(IndexUpdateResponse::with_id(update_id)));
|
||||
}
|
||||
|
||||
fn find_primary_key(document: &IndexMap<String, Value>) -> Option<String> {
|
||||
for key in document.keys() {
|
||||
if key.to_lowercase().contains("id") {
|
||||
return Some(key.to_string());
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
#[post("/indexes/{index_uid}/documents", wrap = "Authentication::Private")]
|
||||
async fn add_documents(
|
||||
data: web::Data<Data>,
|
||||
path: web::Path<IndexParam>,
|
||||
params: web::Query<UpdateDocumentsQuery>,
|
||||
body: web::Json<Vec<Document>>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
update_multiple_documents(data, path, params, body, false).await
|
||||
}
|
||||
|
||||
#[put("/indexes/{index_uid}/documents", wrap = "Authentication::Private")]
|
||||
async fn update_documents(
|
||||
data: web::Data<Data>,
|
||||
path: web::Path<IndexParam>,
|
||||
params: web::Query<UpdateDocumentsQuery>,
|
||||
body: web::Json<Vec<Document>>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
update_multiple_documents(data, path, params, body, true).await
|
||||
}
|
||||
|
||||
#[post(
|
||||
"/indexes/{index_uid}/documents/delete-batch",
|
||||
wrap = "Authentication::Private"
|
||||
)]
|
||||
async fn delete_documents(
|
||||
data: web::Data<Data>,
|
||||
path: web::Path<IndexParam>,
|
||||
body: web::Json<Vec<Value>>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index = data
|
||||
.db
|
||||
.open_index(&path.index_uid)
|
||||
.ok_or(Error::index_not_found(&path.index_uid))?;
|
||||
|
||||
let mut documents_deletion = index.documents_deletion();
|
||||
|
||||
for document_id in body.into_inner() {
|
||||
let document_id = update::value_to_string(&document_id);
|
||||
documents_deletion.delete_document_by_external_docid(document_id);
|
||||
}
|
||||
|
||||
let update_id = data.db.update_write(|w| documents_deletion.finalize(w))?;
|
||||
|
||||
Ok(HttpResponse::Accepted().json(IndexUpdateResponse::with_id(update_id)))
|
||||
}
|
||||
|
||||
#[delete("/indexes/{index_uid}/documents", wrap = "Authentication::Private")]
|
||||
async fn clear_all_documents(
|
||||
data: web::Data<Data>,
|
||||
path: web::Path<IndexParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index = data
|
||||
.db
|
||||
.open_index(&path.index_uid)
|
||||
.ok_or(Error::index_not_found(&path.index_uid))?;
|
||||
|
||||
let update_id = data.db.update_write(|w| index.clear_all(w))?;
|
||||
|
||||
Ok(HttpResponse::Accepted().json(IndexUpdateResponse::with_id(update_id)))
|
||||
}
|
@ -1,64 +0,0 @@
|
||||
use std::fs::File;
|
||||
use std::path::Path;
|
||||
|
||||
use actix_web::{get, post};
|
||||
use actix_web::{HttpResponse, web};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::dump::{DumpInfo, DumpStatus, compressed_dumps_dir, init_dump_process};
|
||||
use crate::Data;
|
||||
use crate::error::{Error, ResponseError};
|
||||
use crate::helpers::Authentication;
|
||||
|
||||
pub fn services(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(trigger_dump)
|
||||
.service(get_dump_status);
|
||||
}
|
||||
|
||||
#[post("/dumps", wrap = "Authentication::Private")]
|
||||
async fn trigger_dump(
|
||||
data: web::Data<Data>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let dumps_dir = Path::new(&data.dumps_dir);
|
||||
match init_dump_process(&data, &dumps_dir) {
|
||||
Ok(resume) => Ok(HttpResponse::Accepted().json(resume)),
|
||||
Err(e) => Err(e.into())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct DumpStatusResponse {
|
||||
status: String,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct DumpParam {
|
||||
dump_uid: String,
|
||||
}
|
||||
|
||||
#[get("/dumps/{dump_uid}/status", wrap = "Authentication::Private")]
|
||||
async fn get_dump_status(
|
||||
data: web::Data<Data>,
|
||||
path: web::Path<DumpParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let dumps_dir = Path::new(&data.dumps_dir);
|
||||
let dump_uid = &path.dump_uid;
|
||||
|
||||
if let Some(resume) = data.get_current_dump_info() {
|
||||
if &resume.uid == dump_uid {
|
||||
return Ok(HttpResponse::Ok().json(resume));
|
||||
}
|
||||
}
|
||||
|
||||
if File::open(compressed_dumps_dir(Path::new(dumps_dir), dump_uid)).is_ok() {
|
||||
let resume = DumpInfo::new(
|
||||
dump_uid.into(),
|
||||
DumpStatus::Done
|
||||
);
|
||||
|
||||
Ok(HttpResponse::Ok().json(resume))
|
||||
} else {
|
||||
Err(Error::not_found("dump does not exist").into())
|
||||
}
|
||||
}
|
@ -1,14 +0,0 @@
|
||||
use actix_web::get;
|
||||
use actix_web::{web, HttpResponse};
|
||||
|
||||
use crate::error::ResponseError;
|
||||
|
||||
pub fn services(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(get_health);
|
||||
}
|
||||
|
||||
#[get("/health")]
|
||||
async fn get_health() -> Result<HttpResponse, ResponseError> {
|
||||
let payload = serde_json::json!({ "status": "available" });
|
||||
Ok(HttpResponse::Ok().json(payload))
|
||||
}
|
@ -1,388 +0,0 @@
|
||||
use actix_web::{delete, get, post, put};
|
||||
use actix_web::{web, HttpResponse};
|
||||
use chrono::{DateTime, Utc};
|
||||
use log::error;
|
||||
use meilisearch_core::{Database, MainReader, UpdateReader};
|
||||
use meilisearch_core::update::UpdateStatus;
|
||||
use rand::seq::SliceRandom;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::Data;
|
||||
use crate::error::{Error, ResponseError};
|
||||
use crate::helpers::Authentication;
|
||||
use crate::routes::IndexParam;
|
||||
|
||||
pub fn services(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(list_indexes)
|
||||
.service(get_index)
|
||||
.service(create_index)
|
||||
.service(update_index)
|
||||
.service(delete_index)
|
||||
.service(get_update_status)
|
||||
.service(get_all_updates_status);
|
||||
}
|
||||
|
||||
fn generate_uid() -> String {
|
||||
let mut rng = rand::thread_rng();
|
||||
let sample = b"abcdefghijklmnopqrstuvwxyz0123456789";
|
||||
sample
|
||||
.choose_multiple(&mut rng, 8)
|
||||
.map(|c| *c as char)
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct IndexResponse {
|
||||
pub name: String,
|
||||
pub uid: String,
|
||||
created_at: DateTime<Utc>,
|
||||
updated_at: DateTime<Utc>,
|
||||
pub primary_key: Option<String>,
|
||||
}
|
||||
|
||||
pub fn list_indexes_sync(data: &web::Data<Data>, reader: &MainReader) -> Result<Vec<IndexResponse>, ResponseError> {
|
||||
let mut indexes = Vec::new();
|
||||
|
||||
for index_uid in data.db.indexes_uids() {
|
||||
let index = data.db.open_index(&index_uid);
|
||||
|
||||
match index {
|
||||
Some(index) => {
|
||||
let name = index.main.name(reader)?.ok_or(Error::internal(
|
||||
"Impossible to get the name of an index",
|
||||
))?;
|
||||
let created_at = index
|
||||
.main
|
||||
.created_at(reader)?
|
||||
.ok_or(Error::internal(
|
||||
"Impossible to get the create date of an index",
|
||||
))?;
|
||||
let updated_at = index
|
||||
.main
|
||||
.updated_at(reader)?
|
||||
.ok_or(Error::internal(
|
||||
"Impossible to get the last update date of an index",
|
||||
))?;
|
||||
|
||||
let primary_key = match index.main.schema(reader) {
|
||||
Ok(Some(schema)) => match schema.primary_key() {
|
||||
Some(primary_key) => Some(primary_key.to_owned()),
|
||||
None => None,
|
||||
},
|
||||
_ => None,
|
||||
};
|
||||
|
||||
let index_response = IndexResponse {
|
||||
name,
|
||||
uid: index_uid,
|
||||
created_at,
|
||||
updated_at,
|
||||
primary_key,
|
||||
};
|
||||
indexes.push(index_response);
|
||||
}
|
||||
None => error!(
|
||||
"Index {} is referenced in the indexes list but cannot be found",
|
||||
index_uid
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(indexes)
|
||||
}
|
||||
|
||||
#[get("/indexes", wrap = "Authentication::Private")]
|
||||
async fn list_indexes(data: web::Data<Data>) -> Result<HttpResponse, ResponseError> {
|
||||
let reader = data.db.main_read_txn()?;
|
||||
let indexes = list_indexes_sync(&data, &reader)?;
|
||||
|
||||
Ok(HttpResponse::Ok().json(indexes))
|
||||
}
|
||||
|
||||
#[get("/indexes/{index_uid}", wrap = "Authentication::Private")]
|
||||
async fn get_index(
|
||||
data: web::Data<Data>,
|
||||
path: web::Path<IndexParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index = data
|
||||
.db
|
||||
.open_index(&path.index_uid)
|
||||
.ok_or(Error::index_not_found(&path.index_uid))?;
|
||||
|
||||
let reader = data.db.main_read_txn()?;
|
||||
let name = index.main.name(&reader)?.ok_or(Error::internal(
|
||||
"Impossible to get the name of an index",
|
||||
))?;
|
||||
let created_at = index
|
||||
.main
|
||||
.created_at(&reader)?
|
||||
.ok_or(Error::internal(
|
||||
"Impossible to get the create date of an index",
|
||||
))?;
|
||||
let updated_at = index
|
||||
.main
|
||||
.updated_at(&reader)?
|
||||
.ok_or(Error::internal(
|
||||
"Impossible to get the last update date of an index",
|
||||
))?;
|
||||
|
||||
let primary_key = match index.main.schema(&reader) {
|
||||
Ok(Some(schema)) => match schema.primary_key() {
|
||||
Some(primary_key) => Some(primary_key.to_owned()),
|
||||
None => None,
|
||||
},
|
||||
_ => None,
|
||||
};
|
||||
let index_response = IndexResponse {
|
||||
name,
|
||||
uid: path.index_uid.clone(),
|
||||
created_at,
|
||||
updated_at,
|
||||
primary_key,
|
||||
};
|
||||
|
||||
Ok(HttpResponse::Ok().json(index_response))
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||
struct IndexCreateRequest {
|
||||
name: Option<String>,
|
||||
uid: Option<String>,
|
||||
primary_key: Option<String>,
|
||||
}
|
||||
|
||||
|
||||
pub fn create_index_sync(
|
||||
database: &std::sync::Arc<Database>,
|
||||
uid: String,
|
||||
name: String,
|
||||
primary_key: Option<String>,
|
||||
) -> Result<IndexResponse, Error> {
|
||||
|
||||
let created_index = database
|
||||
.create_index(&uid)
|
||||
.map_err(|e| match e {
|
||||
meilisearch_core::Error::IndexAlreadyExists => Error::IndexAlreadyExists(uid.clone()),
|
||||
_ => Error::create_index(e)
|
||||
})?;
|
||||
|
||||
let index_response = database.main_write::<_, _, Error>(|mut write_txn| {
|
||||
created_index.main.put_name(&mut write_txn, &name)?;
|
||||
|
||||
let created_at = created_index
|
||||
.main
|
||||
.created_at(&write_txn)?
|
||||
.ok_or(Error::internal("Impossible to read created at"))?;
|
||||
|
||||
let updated_at = created_index
|
||||
.main
|
||||
.updated_at(&write_txn)?
|
||||
.ok_or(Error::internal("Impossible to read updated at"))?;
|
||||
|
||||
if let Some(id) = primary_key.clone() {
|
||||
if let Some(mut schema) = created_index.main.schema(&write_txn)? {
|
||||
schema
|
||||
.set_primary_key(&id)
|
||||
.map_err(Error::bad_request)?;
|
||||
created_index.main.put_schema(&mut write_txn, &schema)?;
|
||||
}
|
||||
}
|
||||
let index_response = IndexResponse {
|
||||
name,
|
||||
uid,
|
||||
created_at,
|
||||
updated_at,
|
||||
primary_key,
|
||||
};
|
||||
Ok(index_response)
|
||||
})?;
|
||||
|
||||
Ok(index_response)
|
||||
}
|
||||
|
||||
#[post("/indexes", wrap = "Authentication::Private")]
|
||||
async fn create_index(
|
||||
data: web::Data<Data>,
|
||||
body: web::Json<IndexCreateRequest>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
if let (None, None) = (body.name.clone(), body.uid.clone()) {
|
||||
return Err(Error::bad_request(
|
||||
"Index creation must have an uid",
|
||||
).into());
|
||||
}
|
||||
|
||||
let uid = match &body.uid {
|
||||
Some(uid) => {
|
||||
if uid
|
||||
.chars()
|
||||
.all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_')
|
||||
{
|
||||
uid.to_owned()
|
||||
} else {
|
||||
return Err(Error::InvalidIndexUid.into());
|
||||
}
|
||||
}
|
||||
None => loop {
|
||||
let uid = generate_uid();
|
||||
if data.db.open_index(&uid).is_none() {
|
||||
break uid;
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
let name = body.name.as_ref().unwrap_or(&uid).to_string();
|
||||
|
||||
let index_response = create_index_sync(&data.db, uid, name, body.primary_key.clone())?;
|
||||
|
||||
Ok(HttpResponse::Created().json(index_response))
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||
struct UpdateIndexRequest {
|
||||
name: Option<String>,
|
||||
primary_key: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct UpdateIndexResponse {
|
||||
name: String,
|
||||
uid: String,
|
||||
created_at: DateTime<Utc>,
|
||||
updated_at: DateTime<Utc>,
|
||||
primary_key: Option<String>,
|
||||
}
|
||||
|
||||
#[put("/indexes/{index_uid}", wrap = "Authentication::Private")]
|
||||
async fn update_index(
|
||||
data: web::Data<Data>,
|
||||
path: web::Path<IndexParam>,
|
||||
body: web::Json<IndexCreateRequest>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index = data
|
||||
.db
|
||||
.open_index(&path.index_uid)
|
||||
.ok_or(Error::index_not_found(&path.index_uid))?;
|
||||
|
||||
data.db.main_write::<_, _, ResponseError>(|writer| {
|
||||
if let Some(name) = &body.name {
|
||||
index.main.put_name(writer, name)?;
|
||||
}
|
||||
|
||||
if let Some(id) = body.primary_key.clone() {
|
||||
if let Some(mut schema) = index.main.schema(writer)? {
|
||||
schema.set_primary_key(&id)?;
|
||||
index.main.put_schema(writer, &schema)?;
|
||||
}
|
||||
}
|
||||
index.main.put_updated_at(writer)?;
|
||||
Ok(())
|
||||
})?;
|
||||
|
||||
let reader = data.db.main_read_txn()?;
|
||||
let name = index.main.name(&reader)?.ok_or(Error::internal(
|
||||
"Impossible to get the name of an index",
|
||||
))?;
|
||||
let created_at = index
|
||||
.main
|
||||
.created_at(&reader)?
|
||||
.ok_or(Error::internal(
|
||||
"Impossible to get the create date of an index",
|
||||
))?;
|
||||
let updated_at = index
|
||||
.main
|
||||
.updated_at(&reader)?
|
||||
.ok_or(Error::internal(
|
||||
"Impossible to get the last update date of an index",
|
||||
))?;
|
||||
|
||||
let primary_key = match index.main.schema(&reader) {
|
||||
Ok(Some(schema)) => match schema.primary_key() {
|
||||
Some(primary_key) => Some(primary_key.to_owned()),
|
||||
None => None,
|
||||
},
|
||||
_ => None,
|
||||
};
|
||||
|
||||
let index_response = IndexResponse {
|
||||
name,
|
||||
uid: path.index_uid.clone(),
|
||||
created_at,
|
||||
updated_at,
|
||||
primary_key,
|
||||
};
|
||||
|
||||
Ok(HttpResponse::Ok().json(index_response))
|
||||
}
|
||||
|
||||
#[delete("/indexes/{index_uid}", wrap = "Authentication::Private")]
|
||||
async fn delete_index(
|
||||
data: web::Data<Data>,
|
||||
path: web::Path<IndexParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
if data.db.delete_index(&path.index_uid)? {
|
||||
Ok(HttpResponse::NoContent().finish())
|
||||
} else {
|
||||
Err(Error::index_not_found(&path.index_uid).into())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct UpdateParam {
|
||||
index_uid: String,
|
||||
update_id: u64,
|
||||
}
|
||||
|
||||
#[get(
|
||||
"/indexes/{index_uid}/updates/{update_id}",
|
||||
wrap = "Authentication::Private"
|
||||
)]
|
||||
async fn get_update_status(
|
||||
data: web::Data<Data>,
|
||||
path: web::Path<UpdateParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index = data
|
||||
.db
|
||||
.open_index(&path.index_uid)
|
||||
.ok_or(Error::index_not_found(&path.index_uid))?;
|
||||
|
||||
let reader = data.db.update_read_txn()?;
|
||||
|
||||
let status = index.update_status(&reader, path.update_id)?;
|
||||
|
||||
match status {
|
||||
Some(status) => Ok(HttpResponse::Ok().json(status)),
|
||||
None => Err(Error::NotFound(format!(
|
||||
"Update {}",
|
||||
path.update_id
|
||||
)).into()),
|
||||
}
|
||||
}
|
||||
pub fn get_all_updates_status_sync(
|
||||
data: &web::Data<Data>,
|
||||
reader: &UpdateReader,
|
||||
index_uid: &str,
|
||||
) -> Result<Vec<UpdateStatus>, Error> {
|
||||
let index = data
|
||||
.db
|
||||
.open_index(index_uid)
|
||||
.ok_or(Error::index_not_found(index_uid))?;
|
||||
|
||||
Ok(index.all_updates_status(reader)?)
|
||||
}
|
||||
|
||||
#[get("/indexes/{index_uid}/updates", wrap = "Authentication::Private")]
|
||||
async fn get_all_updates_status(
|
||||
data: web::Data<Data>,
|
||||
path: web::Path<IndexParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
|
||||
let reader = data.db.update_read_txn()?;
|
||||
|
||||
let response = get_all_updates_status_sync(&data, &reader, &path.index_uid)?;
|
||||
|
||||
Ok(HttpResponse::Ok().json(response))
|
||||
}
|
@ -1,26 +0,0 @@
|
||||
use actix_web::web;
|
||||
use actix_web::HttpResponse;
|
||||
use actix_web::get;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::helpers::Authentication;
|
||||
use crate::Data;
|
||||
|
||||
pub fn services(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(list);
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct KeysResponse {
|
||||
private: Option<String>,
|
||||
public: Option<String>,
|
||||
}
|
||||
|
||||
#[get("/keys", wrap = "Authentication::Admin")]
|
||||
async fn list(data: web::Data<Data>) -> HttpResponse {
|
||||
let api_keys = data.api_keys.clone();
|
||||
HttpResponse::Ok().json(KeysResponse {
|
||||
private: api_keys.private,
|
||||
public: api_keys.public,
|
||||
})
|
||||
}
|
@ -1,56 +0,0 @@
|
||||
use actix_web::{get, HttpResponse};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
pub mod document;
|
||||
pub mod health;
|
||||
pub mod index;
|
||||
pub mod key;
|
||||
pub mod search;
|
||||
pub mod setting;
|
||||
pub mod stats;
|
||||
pub mod stop_words;
|
||||
pub mod synonym;
|
||||
pub mod dump;
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct IndexParam {
|
||||
index_uid: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct IndexUpdateResponse {
|
||||
pub update_id: u64,
|
||||
}
|
||||
|
||||
impl IndexUpdateResponse {
|
||||
pub fn with_id(update_id: u64) -> Self {
|
||||
Self { update_id }
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the dashboard, should not be used in production. See [running]
|
||||
#[get("/")]
|
||||
pub async fn load_html() -> HttpResponse {
|
||||
HttpResponse::Ok()
|
||||
.content_type("text/html; charset=utf-8")
|
||||
.body(include_str!("../../public/interface.html").to_string())
|
||||
}
|
||||
|
||||
/// Always return a 200 with:
|
||||
/// ```json
|
||||
/// {
|
||||
/// "status": "Meilisearch is running"
|
||||
/// }
|
||||
/// ```
|
||||
#[get("/")]
|
||||
pub async fn running() -> HttpResponse {
|
||||
HttpResponse::Ok().json(serde_json::json!({ "status": "MeiliSearch is running" }))
|
||||
}
|
||||
|
||||
#[get("/bulma.min.css")]
|
||||
pub async fn load_css() -> HttpResponse {
|
||||
HttpResponse::Ok()
|
||||
.content_type("text/css; charset=utf-8")
|
||||
.body(include_str!("../../public/bulma.min.css").to_string())
|
||||
}
|
@ -1,270 +0,0 @@
|
||||
use std::collections::{HashMap, HashSet, BTreeSet};
|
||||
|
||||
use actix_web::{get, post, web, HttpResponse};
|
||||
use log::warn;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::error::{Error, FacetCountError, ResponseError};
|
||||
use crate::helpers::meilisearch::{IndexSearchExt, SearchResult};
|
||||
use crate::helpers::Authentication;
|
||||
use crate::routes::IndexParam;
|
||||
use crate::Data;
|
||||
|
||||
use meilisearch_core::facets::FacetFilter;
|
||||
use meilisearch_schema::{FieldId, Schema};
|
||||
|
||||
pub fn services(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(search_with_post).service(search_with_url_query);
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||
pub struct SearchQuery {
|
||||
q: Option<String>,
|
||||
offset: Option<usize>,
|
||||
limit: Option<usize>,
|
||||
attributes_to_retrieve: Option<String>,
|
||||
attributes_to_crop: Option<String>,
|
||||
crop_length: Option<usize>,
|
||||
attributes_to_highlight: Option<String>,
|
||||
filters: Option<String>,
|
||||
matches: Option<bool>,
|
||||
facet_filters: Option<String>,
|
||||
facets_distribution: Option<String>,
|
||||
}
|
||||
|
||||
#[get("/indexes/{index_uid}/search", wrap = "Authentication::Public")]
|
||||
async fn search_with_url_query(
|
||||
data: web::Data<Data>,
|
||||
path: web::Path<IndexParam>,
|
||||
params: web::Query<SearchQuery>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let search_result = params.search(&path.index_uid, data)?;
|
||||
Ok(HttpResponse::Ok().json(search_result))
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||
pub struct SearchQueryPost {
|
||||
q: Option<String>,
|
||||
offset: Option<usize>,
|
||||
limit: Option<usize>,
|
||||
attributes_to_retrieve: Option<Vec<String>>,
|
||||
attributes_to_crop: Option<Vec<String>>,
|
||||
crop_length: Option<usize>,
|
||||
attributes_to_highlight: Option<Vec<String>>,
|
||||
filters: Option<String>,
|
||||
matches: Option<bool>,
|
||||
facet_filters: Option<Value>,
|
||||
facets_distribution: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
impl From<SearchQueryPost> for SearchQuery {
|
||||
fn from(other: SearchQueryPost) -> SearchQuery {
|
||||
SearchQuery {
|
||||
q: other.q,
|
||||
offset: other.offset,
|
||||
limit: other.limit,
|
||||
attributes_to_retrieve: other.attributes_to_retrieve.map(|attrs| attrs.join(",")),
|
||||
attributes_to_crop: other.attributes_to_crop.map(|attrs| attrs.join(",")),
|
||||
crop_length: other.crop_length,
|
||||
attributes_to_highlight: other.attributes_to_highlight.map(|attrs| attrs.join(",")),
|
||||
filters: other.filters,
|
||||
matches: other.matches,
|
||||
facet_filters: other.facet_filters.map(|f| f.to_string()),
|
||||
facets_distribution: other.facets_distribution.map(|f| format!("{:?}", f)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[post("/indexes/{index_uid}/search", wrap = "Authentication::Public")]
|
||||
async fn search_with_post(
|
||||
data: web::Data<Data>,
|
||||
path: web::Path<IndexParam>,
|
||||
params: web::Json<SearchQueryPost>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let query: SearchQuery = params.0.into();
|
||||
let search_result = query.search(&path.index_uid, data)?;
|
||||
Ok(HttpResponse::Ok().json(search_result))
|
||||
}
|
||||
|
||||
impl SearchQuery {
|
||||
fn search(
|
||||
&self,
|
||||
index_uid: &str,
|
||||
data: web::Data<Data>,
|
||||
) -> Result<SearchResult, ResponseError> {
|
||||
let index = data
|
||||
.db
|
||||
.open_index(index_uid)
|
||||
.ok_or(Error::index_not_found(index_uid))?;
|
||||
|
||||
let reader = data.db.main_read_txn()?;
|
||||
let schema = index
|
||||
.main
|
||||
.schema(&reader)?
|
||||
.ok_or(Error::internal("Impossible to retrieve the schema"))?;
|
||||
|
||||
let query = self
|
||||
.q
|
||||
.clone()
|
||||
.and_then(|q| if q.is_empty() { None } else { Some(q) });
|
||||
|
||||
let mut search_builder = index.new_search(query);
|
||||
|
||||
if let Some(offset) = self.offset {
|
||||
search_builder.offset(offset);
|
||||
}
|
||||
if let Some(limit) = self.limit {
|
||||
search_builder.limit(limit);
|
||||
}
|
||||
|
||||
let available_attributes = schema.displayed_names();
|
||||
let mut restricted_attributes: BTreeSet<&str>;
|
||||
match &self.attributes_to_retrieve {
|
||||
Some(attributes_to_retrieve) => {
|
||||
let attributes_to_retrieve: HashSet<&str> =
|
||||
attributes_to_retrieve.split(',').collect();
|
||||
if attributes_to_retrieve.contains("*") {
|
||||
restricted_attributes = available_attributes.clone();
|
||||
} else {
|
||||
restricted_attributes = BTreeSet::new();
|
||||
search_builder.attributes_to_retrieve(HashSet::new());
|
||||
for attr in attributes_to_retrieve {
|
||||
if available_attributes.contains(attr) {
|
||||
restricted_attributes.insert(attr);
|
||||
search_builder.add_retrievable_field(attr.to_string());
|
||||
} else {
|
||||
warn!("The attributes {:?} present in attributesToRetrieve parameter doesn't exist", attr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None => {
|
||||
restricted_attributes = available_attributes.clone();
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(ref facet_filters) = self.facet_filters {
|
||||
let attrs = index
|
||||
.main
|
||||
.attributes_for_faceting(&reader)?
|
||||
.unwrap_or_default();
|
||||
search_builder.add_facet_filters(FacetFilter::from_str(
|
||||
facet_filters,
|
||||
&schema,
|
||||
&attrs,
|
||||
)?);
|
||||
}
|
||||
|
||||
if let Some(facets) = &self.facets_distribution {
|
||||
match index.main.attributes_for_faceting(&reader)? {
|
||||
Some(ref attrs) => {
|
||||
let field_ids = prepare_facet_list(&facets, &schema, attrs)?;
|
||||
search_builder.add_facets(field_ids);
|
||||
}
|
||||
None => return Err(FacetCountError::NoFacetSet.into()),
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(attributes_to_crop) = &self.attributes_to_crop {
|
||||
let default_length = self.crop_length.unwrap_or(200);
|
||||
let mut final_attributes: HashMap<String, usize> = HashMap::new();
|
||||
|
||||
for attribute in attributes_to_crop.split(',') {
|
||||
let mut attribute = attribute.split(':');
|
||||
let attr = attribute.next();
|
||||
let length = attribute
|
||||
.next()
|
||||
.and_then(|s| s.parse().ok())
|
||||
.unwrap_or(default_length);
|
||||
match attr {
|
||||
Some("*") => {
|
||||
for attr in &restricted_attributes {
|
||||
final_attributes.insert(attr.to_string(), length);
|
||||
}
|
||||
}
|
||||
Some(attr) => {
|
||||
if available_attributes.contains(attr) {
|
||||
final_attributes.insert(attr.to_string(), length);
|
||||
} else {
|
||||
warn!("The attributes {:?} present in attributesToCrop parameter doesn't exist", attr);
|
||||
}
|
||||
}
|
||||
None => (),
|
||||
}
|
||||
}
|
||||
search_builder.attributes_to_crop(final_attributes);
|
||||
}
|
||||
|
||||
if let Some(attributes_to_highlight) = &self.attributes_to_highlight {
|
||||
let mut final_attributes: HashSet<String> = HashSet::new();
|
||||
for attribute in attributes_to_highlight.split(',') {
|
||||
if attribute == "*" {
|
||||
for attr in &restricted_attributes {
|
||||
final_attributes.insert(attr.to_string());
|
||||
}
|
||||
} else if available_attributes.contains(attribute) {
|
||||
final_attributes.insert(attribute.to_string());
|
||||
} else {
|
||||
warn!("The attributes {:?} present in attributesToHighlight parameter doesn't exist", attribute);
|
||||
}
|
||||
}
|
||||
|
||||
search_builder.attributes_to_highlight(final_attributes);
|
||||
}
|
||||
|
||||
if let Some(filters) = &self.filters {
|
||||
search_builder.filters(filters.to_string());
|
||||
}
|
||||
|
||||
if let Some(matches) = self.matches {
|
||||
if matches {
|
||||
search_builder.get_matches();
|
||||
}
|
||||
}
|
||||
search_builder.search(&reader)
|
||||
}
|
||||
}
|
||||
|
||||
/// Parses the incoming string into an array of attributes for which to return a count. It returns
|
||||
/// a Vec of attribute names ascociated with their id.
|
||||
///
|
||||
/// An error is returned if the array is malformed, or if it contains attributes that are
|
||||
/// unexisting, or not set as facets.
|
||||
fn prepare_facet_list(
|
||||
facets: &str,
|
||||
schema: &Schema,
|
||||
facet_attrs: &[FieldId],
|
||||
) -> Result<Vec<(FieldId, String)>, FacetCountError> {
|
||||
let json_array = serde_json::from_str(facets)?;
|
||||
match json_array {
|
||||
Value::Array(vals) => {
|
||||
let wildcard = Value::String("*".to_string());
|
||||
if vals.iter().any(|f| f == &wildcard) {
|
||||
let attrs = facet_attrs
|
||||
.iter()
|
||||
.filter_map(|&id| schema.name(id).map(|n| (id, n.to_string())))
|
||||
.collect();
|
||||
return Ok(attrs);
|
||||
}
|
||||
let mut field_ids = Vec::with_capacity(facet_attrs.len());
|
||||
for facet in vals {
|
||||
match facet {
|
||||
Value::String(facet) => {
|
||||
if let Some(id) = schema.id(&facet) {
|
||||
if !facet_attrs.contains(&id) {
|
||||
return Err(FacetCountError::AttributeNotSet(facet));
|
||||
}
|
||||
field_ids.push((id, facet));
|
||||
}
|
||||
}
|
||||
bad_val => return Err(FacetCountError::unexpected_token(bad_val, &["String"])),
|
||||
}
|
||||
}
|
||||
Ok(field_ids)
|
||||
}
|
||||
bad_val => Err(FacetCountError::unexpected_token(bad_val, &["[String]"])),
|
||||
}
|
||||
}
|
@ -1,547 +0,0 @@
|
||||
use std::collections::{BTreeMap, BTreeSet};
|
||||
|
||||
use actix_web::{delete, get, post};
|
||||
use actix_web::{web, HttpResponse};
|
||||
use meilisearch_core::{MainReader, UpdateWriter};
|
||||
use meilisearch_core::settings::{Settings, SettingsUpdate, UpdateState, DEFAULT_RANKING_RULES};
|
||||
use meilisearch_schema::Schema;
|
||||
|
||||
use crate::Data;
|
||||
use crate::error::{Error, ResponseError};
|
||||
use crate::helpers::Authentication;
|
||||
use crate::routes::{IndexParam, IndexUpdateResponse};
|
||||
|
||||
pub fn services(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(update_all)
|
||||
.service(get_all)
|
||||
.service(delete_all)
|
||||
.service(get_rules)
|
||||
.service(update_rules)
|
||||
.service(delete_rules)
|
||||
.service(get_distinct)
|
||||
.service(update_distinct)
|
||||
.service(delete_distinct)
|
||||
.service(get_searchable)
|
||||
.service(update_searchable)
|
||||
.service(delete_searchable)
|
||||
.service(get_displayed)
|
||||
.service(update_displayed)
|
||||
.service(delete_displayed)
|
||||
.service(get_attributes_for_faceting)
|
||||
.service(delete_attributes_for_faceting)
|
||||
.service(update_attributes_for_faceting);
|
||||
}
|
||||
|
||||
pub fn update_all_settings_txn(
|
||||
data: &web::Data<Data>,
|
||||
settings: SettingsUpdate,
|
||||
index_uid: &str,
|
||||
write_txn: &mut UpdateWriter,
|
||||
) -> Result<u64, Error> {
|
||||
let index = data
|
||||
.db
|
||||
.open_index(index_uid)
|
||||
.ok_or(Error::index_not_found(index_uid))?;
|
||||
|
||||
let update_id = index.settings_update(write_txn, settings)?;
|
||||
Ok(update_id)
|
||||
}
|
||||
|
||||
#[post("/indexes/{index_uid}/settings", wrap = "Authentication::Private")]
|
||||
async fn update_all(
|
||||
data: web::Data<Data>,
|
||||
path: web::Path<IndexParam>,
|
||||
body: web::Json<Settings>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let update_id = data.get_or_create_index(&path.index_uid, |index| {
|
||||
Ok(data.db.update_write::<_, _, ResponseError>(|writer| {
|
||||
let settings = body.into_inner().to_update().map_err(Error::bad_request)?;
|
||||
let update_id = index.settings_update(writer, settings)?;
|
||||
Ok(update_id)
|
||||
})?)
|
||||
})?;
|
||||
|
||||
Ok(HttpResponse::Accepted().json(IndexUpdateResponse::with_id(update_id)))
|
||||
}
|
||||
|
||||
pub fn get_all_sync(data: &web::Data<Data>, reader: &MainReader, index_uid: &str) -> Result<Settings, Error> {
|
||||
let index = data
|
||||
.db
|
||||
.open_index(index_uid)
|
||||
.ok_or(Error::index_not_found(index_uid))?;
|
||||
|
||||
let stop_words: BTreeSet<String> = index.main.stop_words(&reader)?.into_iter().collect();
|
||||
|
||||
let synonyms_list = index.main.synonyms(reader)?;
|
||||
|
||||
let mut synonyms = BTreeMap::new();
|
||||
let index_synonyms = &index.synonyms;
|
||||
for synonym in synonyms_list {
|
||||
let list = index_synonyms.synonyms(reader, synonym.as_bytes())?;
|
||||
synonyms.insert(synonym, list);
|
||||
}
|
||||
|
||||
let ranking_rules = index
|
||||
.main
|
||||
.ranking_rules(reader)?
|
||||
.unwrap_or(DEFAULT_RANKING_RULES.to_vec())
|
||||
.into_iter()
|
||||
.map(|r| r.to_string())
|
||||
.collect();
|
||||
|
||||
let schema = index.main.schema(&reader)?;
|
||||
|
||||
let distinct_attribute = match (index.main.distinct_attribute(reader)?, &schema) {
|
||||
(Some(id), Some(schema)) => schema.name(id).map(str::to_string),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
let attributes_for_faceting = match (&schema, &index.main.attributes_for_faceting(&reader)?) {
|
||||
(Some(schema), Some(attrs)) => attrs
|
||||
.iter()
|
||||
.filter_map(|&id| schema.name(id))
|
||||
.map(str::to_string)
|
||||
.collect(),
|
||||
_ => vec![],
|
||||
};
|
||||
|
||||
let searchable_attributes = schema.as_ref().map(get_indexed_attributes);
|
||||
let displayed_attributes = schema.as_ref().map(get_displayed_attributes);
|
||||
|
||||
Ok(Settings {
|
||||
ranking_rules: Some(Some(ranking_rules)),
|
||||
distinct_attribute: Some(distinct_attribute),
|
||||
searchable_attributes: Some(searchable_attributes),
|
||||
displayed_attributes: Some(displayed_attributes),
|
||||
stop_words: Some(Some(stop_words)),
|
||||
synonyms: Some(Some(synonyms)),
|
||||
attributes_for_faceting: Some(Some(attributes_for_faceting)),
|
||||
})
|
||||
}
|
||||
|
||||
#[get("/indexes/{index_uid}/settings", wrap = "Authentication::Private")]
|
||||
async fn get_all(
|
||||
data: web::Data<Data>,
|
||||
path: web::Path<IndexParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let reader = data.db.main_read_txn()?;
|
||||
let settings = get_all_sync(&data, &reader, &path.index_uid)?;
|
||||
|
||||
Ok(HttpResponse::Ok().json(settings))
|
||||
}
|
||||
|
||||
#[delete("/indexes/{index_uid}/settings", wrap = "Authentication::Private")]
|
||||
async fn delete_all(
|
||||
data: web::Data<Data>,
|
||||
path: web::Path<IndexParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index = data
|
||||
.db
|
||||
.open_index(&path.index_uid)
|
||||
.ok_or(Error::index_not_found(&path.index_uid))?;
|
||||
|
||||
let settings = SettingsUpdate {
|
||||
ranking_rules: UpdateState::Clear,
|
||||
distinct_attribute: UpdateState::Clear,
|
||||
primary_key: UpdateState::Clear,
|
||||
searchable_attributes: UpdateState::Clear,
|
||||
displayed_attributes: UpdateState::Clear,
|
||||
stop_words: UpdateState::Clear,
|
||||
synonyms: UpdateState::Clear,
|
||||
attributes_for_faceting: UpdateState::Clear,
|
||||
};
|
||||
|
||||
let update_id = data
|
||||
.db
|
||||
.update_write(|w| index.settings_update(w, settings))?;
|
||||
|
||||
Ok(HttpResponse::Accepted().json(IndexUpdateResponse::with_id(update_id)))
|
||||
}
|
||||
|
||||
#[get(
|
||||
"/indexes/{index_uid}/settings/ranking-rules",
|
||||
wrap = "Authentication::Private"
|
||||
)]
|
||||
async fn get_rules(
|
||||
data: web::Data<Data>,
|
||||
path: web::Path<IndexParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index = data
|
||||
.db
|
||||
.open_index(&path.index_uid)
|
||||
.ok_or(Error::index_not_found(&path.index_uid))?;
|
||||
let reader = data.db.main_read_txn()?;
|
||||
|
||||
let ranking_rules = index
|
||||
.main
|
||||
.ranking_rules(&reader)?
|
||||
.unwrap_or(DEFAULT_RANKING_RULES.to_vec())
|
||||
.into_iter()
|
||||
.map(|r| r.to_string())
|
||||
.collect::<Vec<String>>();
|
||||
|
||||
Ok(HttpResponse::Ok().json(ranking_rules))
|
||||
}
|
||||
|
||||
#[post(
|
||||
"/indexes/{index_uid}/settings/ranking-rules",
|
||||
wrap = "Authentication::Private"
|
||||
)]
|
||||
async fn update_rules(
|
||||
data: web::Data<Data>,
|
||||
path: web::Path<IndexParam>,
|
||||
body: web::Json<Option<Vec<String>>>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let update_id = data.get_or_create_index(&path.index_uid, |index| {
|
||||
let settings = Settings {
|
||||
ranking_rules: Some(body.into_inner()),
|
||||
..Settings::default()
|
||||
};
|
||||
|
||||
let settings = settings.to_update().map_err(Error::bad_request)?;
|
||||
Ok(data
|
||||
.db
|
||||
.update_write(|w| index.settings_update(w, settings))?)
|
||||
})?;
|
||||
|
||||
Ok(HttpResponse::Accepted().json(IndexUpdateResponse::with_id(update_id)))
|
||||
}
|
||||
|
||||
#[delete(
|
||||
"/indexes/{index_uid}/settings/ranking-rules",
|
||||
wrap = "Authentication::Private"
|
||||
)]
|
||||
async fn delete_rules(
|
||||
data: web::Data<Data>,
|
||||
path: web::Path<IndexParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index = data
|
||||
.db
|
||||
.open_index(&path.index_uid)
|
||||
.ok_or(Error::index_not_found(&path.index_uid))?;
|
||||
|
||||
let settings = SettingsUpdate {
|
||||
ranking_rules: UpdateState::Clear,
|
||||
..SettingsUpdate::default()
|
||||
};
|
||||
|
||||
let update_id = data
|
||||
.db
|
||||
.update_write(|w| index.settings_update(w, settings))?;
|
||||
|
||||
Ok(HttpResponse::Accepted().json(IndexUpdateResponse::with_id(update_id)))
|
||||
}
|
||||
|
||||
#[get(
|
||||
"/indexes/{index_uid}/settings/distinct-attribute",
|
||||
wrap = "Authentication::Private"
|
||||
)]
|
||||
async fn get_distinct(
|
||||
data: web::Data<Data>,
|
||||
path: web::Path<IndexParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index = data
|
||||
.db
|
||||
.open_index(&path.index_uid)
|
||||
.ok_or(Error::index_not_found(&path.index_uid))?;
|
||||
let reader = data.db.main_read_txn()?;
|
||||
let distinct_attribute_id = index.main.distinct_attribute(&reader)?;
|
||||
let schema = index.main.schema(&reader)?;
|
||||
let distinct_attribute = match (schema, distinct_attribute_id) {
|
||||
(Some(schema), Some(id)) => schema.name(id).map(str::to_string),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
Ok(HttpResponse::Ok().json(distinct_attribute))
|
||||
}
|
||||
|
||||
#[post(
|
||||
"/indexes/{index_uid}/settings/distinct-attribute",
|
||||
wrap = "Authentication::Private"
|
||||
)]
|
||||
async fn update_distinct(
|
||||
data: web::Data<Data>,
|
||||
path: web::Path<IndexParam>,
|
||||
body: web::Json<Option<String>>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let update_id = data.get_or_create_index(&path.index_uid, |index| {
|
||||
let settings = Settings {
|
||||
distinct_attribute: Some(body.into_inner()),
|
||||
..Settings::default()
|
||||
};
|
||||
|
||||
let settings = settings.to_update().map_err(Error::bad_request)?;
|
||||
Ok(data
|
||||
.db
|
||||
.update_write(|w| index.settings_update(w, settings))?)
|
||||
})?;
|
||||
|
||||
Ok(HttpResponse::Accepted().json(IndexUpdateResponse::with_id(update_id)))
|
||||
}
|
||||
|
||||
#[delete(
|
||||
"/indexes/{index_uid}/settings/distinct-attribute",
|
||||
wrap = "Authentication::Private"
|
||||
)]
|
||||
async fn delete_distinct(
|
||||
data: web::Data<Data>,
|
||||
path: web::Path<IndexParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index = data
|
||||
.db
|
||||
.open_index(&path.index_uid)
|
||||
.ok_or(Error::index_not_found(&path.index_uid))?;
|
||||
|
||||
let settings = SettingsUpdate {
|
||||
distinct_attribute: UpdateState::Clear,
|
||||
..SettingsUpdate::default()
|
||||
};
|
||||
|
||||
let update_id = data
|
||||
.db
|
||||
.update_write(|w| index.settings_update(w, settings))?;
|
||||
|
||||
Ok(HttpResponse::Accepted().json(IndexUpdateResponse::with_id(update_id)))
|
||||
}
|
||||
|
||||
#[get(
|
||||
"/indexes/{index_uid}/settings/searchable-attributes",
|
||||
wrap = "Authentication::Private"
|
||||
)]
|
||||
async fn get_searchable(
|
||||
data: web::Data<Data>,
|
||||
path: web::Path<IndexParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index = data
|
||||
.db
|
||||
.open_index(&path.index_uid)
|
||||
.ok_or(Error::index_not_found(&path.index_uid))?;
|
||||
let reader = data.db.main_read_txn()?;
|
||||
let schema = index.main.schema(&reader)?;
|
||||
let searchable_attributes: Option<Vec<String>> = schema.as_ref().map(get_indexed_attributes);
|
||||
|
||||
Ok(HttpResponse::Ok().json(searchable_attributes))
|
||||
}
|
||||
|
||||
#[post(
|
||||
"/indexes/{index_uid}/settings/searchable-attributes",
|
||||
wrap = "Authentication::Private"
|
||||
)]
|
||||
async fn update_searchable(
|
||||
data: web::Data<Data>,
|
||||
path: web::Path<IndexParam>,
|
||||
body: web::Json<Option<Vec<String>>>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let update_id = data.get_or_create_index(&path.index_uid, |index| {
|
||||
let settings = Settings {
|
||||
searchable_attributes: Some(body.into_inner()),
|
||||
..Settings::default()
|
||||
};
|
||||
|
||||
let settings = settings.to_update().map_err(Error::bad_request)?;
|
||||
|
||||
Ok(data
|
||||
.db
|
||||
.update_write(|w| index.settings_update(w, settings))?)
|
||||
})?;
|
||||
|
||||
Ok(HttpResponse::Accepted().json(IndexUpdateResponse::with_id(update_id)))
|
||||
}
|
||||
|
||||
#[delete(
|
||||
"/indexes/{index_uid}/settings/searchable-attributes",
|
||||
wrap = "Authentication::Private"
|
||||
)]
|
||||
async fn delete_searchable(
|
||||
data: web::Data<Data>,
|
||||
path: web::Path<IndexParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index = data
|
||||
.db
|
||||
.open_index(&path.index_uid)
|
||||
.ok_or(Error::index_not_found(&path.index_uid))?;
|
||||
|
||||
let settings = SettingsUpdate {
|
||||
searchable_attributes: UpdateState::Clear,
|
||||
..SettingsUpdate::default()
|
||||
};
|
||||
|
||||
let update_id = data
|
||||
.db
|
||||
.update_write(|w| index.settings_update(w, settings))?;
|
||||
|
||||
Ok(HttpResponse::Accepted().json(IndexUpdateResponse::with_id(update_id)))
|
||||
}
|
||||
|
||||
#[get(
|
||||
"/indexes/{index_uid}/settings/displayed-attributes",
|
||||
wrap = "Authentication::Private"
|
||||
)]
|
||||
async fn get_displayed(
|
||||
data: web::Data<Data>,
|
||||
path: web::Path<IndexParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index = data
|
||||
.db
|
||||
.open_index(&path.index_uid)
|
||||
.ok_or(Error::index_not_found(&path.index_uid))?;
|
||||
let reader = data.db.main_read_txn()?;
|
||||
|
||||
let schema = index.main.schema(&reader)?;
|
||||
|
||||
let displayed_attributes = schema.as_ref().map(get_displayed_attributes);
|
||||
|
||||
Ok(HttpResponse::Ok().json(displayed_attributes))
|
||||
}
|
||||
|
||||
#[post(
|
||||
"/indexes/{index_uid}/settings/displayed-attributes",
|
||||
wrap = "Authentication::Private"
|
||||
)]
|
||||
async fn update_displayed(
|
||||
data: web::Data<Data>,
|
||||
path: web::Path<IndexParam>,
|
||||
body: web::Json<Option<BTreeSet<String>>>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let update_id = data.get_or_create_index(&path.index_uid, |index| {
|
||||
let settings = Settings {
|
||||
displayed_attributes: Some(body.into_inner()),
|
||||
..Settings::default()
|
||||
};
|
||||
|
||||
let settings = settings.to_update().map_err(Error::bad_request)?;
|
||||
Ok(data
|
||||
.db
|
||||
.update_write(|w| index.settings_update(w, settings))?)
|
||||
})?;
|
||||
|
||||
Ok(HttpResponse::Accepted().json(IndexUpdateResponse::with_id(update_id)))
|
||||
}
|
||||
|
||||
#[delete(
|
||||
"/indexes/{index_uid}/settings/displayed-attributes",
|
||||
wrap = "Authentication::Private"
|
||||
)]
|
||||
async fn delete_displayed(
|
||||
data: web::Data<Data>,
|
||||
path: web::Path<IndexParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index = data
|
||||
.db
|
||||
.open_index(&path.index_uid)
|
||||
.ok_or(Error::index_not_found(&path.index_uid))?;
|
||||
|
||||
let settings = SettingsUpdate {
|
||||
displayed_attributes: UpdateState::Clear,
|
||||
..SettingsUpdate::default()
|
||||
};
|
||||
|
||||
let update_id = data
|
||||
.db
|
||||
.update_write(|w| index.settings_update(w, settings))?;
|
||||
|
||||
Ok(HttpResponse::Accepted().json(IndexUpdateResponse::with_id(update_id)))
|
||||
}
|
||||
|
||||
#[get(
|
||||
"/indexes/{index_uid}/settings/attributes-for-faceting",
|
||||
wrap = "Authentication::Private"
|
||||
)]
|
||||
async fn get_attributes_for_faceting(
|
||||
data: web::Data<Data>,
|
||||
path: web::Path<IndexParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index = data
|
||||
.db
|
||||
.open_index(&path.index_uid)
|
||||
.ok_or(Error::index_not_found(&path.index_uid))?;
|
||||
|
||||
let attributes_for_faceting = data.db.main_read::<_, _, ResponseError>(|reader| {
|
||||
let schema = index.main.schema(reader)?;
|
||||
let attrs = index.main.attributes_for_faceting(reader)?;
|
||||
let attr_names = match (&schema, &attrs) {
|
||||
(Some(schema), Some(attrs)) => attrs
|
||||
.iter()
|
||||
.filter_map(|&id| schema.name(id))
|
||||
.map(str::to_string)
|
||||
.collect(),
|
||||
_ => vec![],
|
||||
};
|
||||
Ok(attr_names)
|
||||
})?;
|
||||
|
||||
Ok(HttpResponse::Ok().json(attributes_for_faceting))
|
||||
}
|
||||
|
||||
#[post(
|
||||
"/indexes/{index_uid}/settings/attributes-for-faceting",
|
||||
wrap = "Authentication::Private"
|
||||
)]
|
||||
async fn update_attributes_for_faceting(
|
||||
data: web::Data<Data>,
|
||||
path: web::Path<IndexParam>,
|
||||
body: web::Json<Option<Vec<String>>>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let update_id = data.get_or_create_index(&path.index_uid, |index| {
|
||||
let settings = Settings {
|
||||
attributes_for_faceting: Some(body.into_inner()),
|
||||
..Settings::default()
|
||||
};
|
||||
|
||||
let settings = settings.to_update().map_err(Error::bad_request)?;
|
||||
Ok(data
|
||||
.db
|
||||
.update_write(|w| index.settings_update(w, settings))?)
|
||||
})?;
|
||||
|
||||
Ok(HttpResponse::Accepted().json(IndexUpdateResponse::with_id(update_id)))
|
||||
}
|
||||
|
||||
#[delete(
|
||||
"/indexes/{index_uid}/settings/attributes-for-faceting",
|
||||
wrap = "Authentication::Private"
|
||||
)]
|
||||
async fn delete_attributes_for_faceting(
|
||||
data: web::Data<Data>,
|
||||
path: web::Path<IndexParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index = data
|
||||
.db
|
||||
.open_index(&path.index_uid)
|
||||
.ok_or(Error::index_not_found(&path.index_uid))?;
|
||||
|
||||
let settings = SettingsUpdate {
|
||||
attributes_for_faceting: UpdateState::Clear,
|
||||
..SettingsUpdate::default()
|
||||
};
|
||||
|
||||
let update_id = data
|
||||
.db
|
||||
.update_write(|w| index.settings_update(w, settings))?;
|
||||
|
||||
Ok(HttpResponse::Accepted().json(IndexUpdateResponse::with_id(update_id)))
|
||||
}
|
||||
|
||||
fn get_indexed_attributes(schema: &Schema) -> Vec<String> {
|
||||
if schema.is_searchable_all() {
|
||||
vec!["*".to_string()]
|
||||
} else {
|
||||
schema
|
||||
.searchable_names()
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
fn get_displayed_attributes(schema: &Schema) -> BTreeSet<String> {
|
||||
if schema.is_displayed_all() {
|
||||
["*"].iter().map(|s| s.to_string()).collect()
|
||||
} else {
|
||||
schema
|
||||
.displayed_names()
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect()
|
||||
}
|
||||
}
|
@ -1,134 +0,0 @@
|
||||
use std::collections::{HashMap, BTreeMap};
|
||||
|
||||
use actix_web::web;
|
||||
use actix_web::HttpResponse;
|
||||
use actix_web::get;
|
||||
use chrono::{DateTime, Utc};
|
||||
use log::error;
|
||||
use serde::Serialize;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
use crate::error::{Error, ResponseError};
|
||||
use crate::helpers::Authentication;
|
||||
use crate::routes::IndexParam;
|
||||
use crate::Data;
|
||||
|
||||
pub fn services(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(index_stats)
|
||||
.service(get_stats)
|
||||
.service(get_version);
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct IndexStatsResponse {
|
||||
number_of_documents: u64,
|
||||
is_indexing: bool,
|
||||
fields_distribution: BTreeMap<String, usize>,
|
||||
}
|
||||
|
||||
#[get("/indexes/{index_uid}/stats", wrap = "Authentication::Private")]
|
||||
async fn index_stats(
|
||||
data: web::Data<Data>,
|
||||
path: web::Path<IndexParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index = data
|
||||
.db
|
||||
.open_index(&path.index_uid)
|
||||
.ok_or(Error::index_not_found(&path.index_uid))?;
|
||||
|
||||
let reader = data.db.main_read_txn()?;
|
||||
|
||||
let number_of_documents = index.main.number_of_documents(&reader)?;
|
||||
|
||||
let fields_distribution = index.main.fields_distribution(&reader)?.unwrap_or_default();
|
||||
|
||||
let update_reader = data.db.update_read_txn()?;
|
||||
|
||||
let is_indexing =
|
||||
data.db.is_indexing(&update_reader, &path.index_uid)?
|
||||
.ok_or(Error::internal(
|
||||
"Impossible to know if the database is indexing",
|
||||
))?;
|
||||
|
||||
Ok(HttpResponse::Ok().json(IndexStatsResponse {
|
||||
number_of_documents,
|
||||
is_indexing,
|
||||
fields_distribution,
|
||||
}))
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct StatsResult {
|
||||
database_size: u64,
|
||||
last_update: Option<DateTime<Utc>>,
|
||||
indexes: HashMap<String, IndexStatsResponse>,
|
||||
}
|
||||
|
||||
#[get("/stats", wrap = "Authentication::Private")]
|
||||
async fn get_stats(data: web::Data<Data>) -> Result<HttpResponse, ResponseError> {
|
||||
let mut index_list = HashMap::new();
|
||||
|
||||
let reader = data.db.main_read_txn()?;
|
||||
let update_reader = data.db.update_read_txn()?;
|
||||
|
||||
let indexes_set = data.db.indexes_uids();
|
||||
for index_uid in indexes_set {
|
||||
let index = data.db.open_index(&index_uid);
|
||||
match index {
|
||||
Some(index) => {
|
||||
let number_of_documents = index.main.number_of_documents(&reader)?;
|
||||
|
||||
let fields_distribution = index.main.fields_distribution(&reader)?.unwrap_or_default();
|
||||
|
||||
let is_indexing = data.db.is_indexing(&update_reader, &index_uid)?.ok_or(
|
||||
Error::internal("Impossible to know if the database is indexing"),
|
||||
)?;
|
||||
|
||||
let response = IndexStatsResponse {
|
||||
number_of_documents,
|
||||
is_indexing,
|
||||
fields_distribution,
|
||||
};
|
||||
index_list.insert(index_uid, response);
|
||||
}
|
||||
None => error!(
|
||||
"Index {:?} is referenced in the indexes list but cannot be found",
|
||||
index_uid
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
let database_size = WalkDir::new(&data.db_path)
|
||||
.into_iter()
|
||||
.filter_map(|entry| entry.ok())
|
||||
.filter_map(|entry| entry.metadata().ok())
|
||||
.filter(|metadata| metadata.is_file())
|
||||
.fold(0, |acc, m| acc + m.len());
|
||||
|
||||
let last_update = data.db.last_update(&reader)?;
|
||||
|
||||
Ok(HttpResponse::Ok().json(StatsResult {
|
||||
database_size,
|
||||
last_update,
|
||||
indexes: index_list,
|
||||
}))
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct VersionResponse {
|
||||
commit_sha: String,
|
||||
build_date: String,
|
||||
pkg_version: String,
|
||||
}
|
||||
|
||||
#[get("/version", wrap = "Authentication::Private")]
|
||||
async fn get_version() -> HttpResponse {
|
||||
HttpResponse::Ok().json(VersionResponse {
|
||||
commit_sha: env!("VERGEN_SHA").to_string(),
|
||||
build_date: env!("VERGEN_BUILD_TIMESTAMP").to_string(),
|
||||
pkg_version: env!("CARGO_PKG_VERSION").to_string(),
|
||||
})
|
||||
}
|
@ -1,79 +0,0 @@
|
||||
use actix_web::{web, HttpResponse};
|
||||
use actix_web::{delete, get, post};
|
||||
use meilisearch_core::settings::{SettingsUpdate, UpdateState};
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
use crate::error::{Error, ResponseError};
|
||||
use crate::helpers::Authentication;
|
||||
use crate::routes::{IndexParam, IndexUpdateResponse};
|
||||
use crate::Data;
|
||||
|
||||
pub fn services(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(get).service(update).service(delete);
|
||||
}
|
||||
|
||||
#[get(
|
||||
"/indexes/{index_uid}/settings/stop-words",
|
||||
wrap = "Authentication::Private"
|
||||
)]
|
||||
async fn get(
|
||||
data: web::Data<Data>,
|
||||
path: web::Path<IndexParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index = data
|
||||
.db
|
||||
.open_index(&path.index_uid)
|
||||
.ok_or(Error::index_not_found(&path.index_uid))?;
|
||||
let reader = data.db.main_read_txn()?;
|
||||
let stop_words = index.main.stop_words(&reader)?;
|
||||
|
||||
Ok(HttpResponse::Ok().json(stop_words))
|
||||
}
|
||||
|
||||
#[post(
|
||||
"/indexes/{index_uid}/settings/stop-words",
|
||||
wrap = "Authentication::Private"
|
||||
)]
|
||||
async fn update(
|
||||
data: web::Data<Data>,
|
||||
path: web::Path<IndexParam>,
|
||||
body: web::Json<BTreeSet<String>>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let update_id = data.get_or_create_index(&path.index_uid, |index| {
|
||||
let settings = SettingsUpdate {
|
||||
stop_words: UpdateState::Update(body.into_inner()),
|
||||
..SettingsUpdate::default()
|
||||
};
|
||||
|
||||
Ok(data
|
||||
.db
|
||||
.update_write(|w| index.settings_update(w, settings))?)
|
||||
})?;
|
||||
|
||||
Ok(HttpResponse::Accepted().json(IndexUpdateResponse::with_id(update_id)))
|
||||
}
|
||||
|
||||
#[delete(
|
||||
"/indexes/{index_uid}/settings/stop-words",
|
||||
wrap = "Authentication::Private"
|
||||
)]
|
||||
async fn delete(
|
||||
data: web::Data<Data>,
|
||||
path: web::Path<IndexParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index = data
|
||||
.db
|
||||
.open_index(&path.index_uid)
|
||||
.ok_or(Error::index_not_found(&path.index_uid))?;
|
||||
|
||||
let settings = SettingsUpdate {
|
||||
stop_words: UpdateState::Clear,
|
||||
..SettingsUpdate::default()
|
||||
};
|
||||
|
||||
let update_id = data
|
||||
.db
|
||||
.update_write(|w| index.settings_update(w, settings))?;
|
||||
|
||||
Ok(HttpResponse::Accepted().json(IndexUpdateResponse::with_id(update_id)))
|
||||
}
|
@ -1,90 +0,0 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use actix_web::{web, HttpResponse};
|
||||
use actix_web::{delete, get, post};
|
||||
use indexmap::IndexMap;
|
||||
use meilisearch_core::settings::{SettingsUpdate, UpdateState};
|
||||
|
||||
use crate::error::{Error, ResponseError};
|
||||
use crate::helpers::Authentication;
|
||||
use crate::routes::{IndexParam, IndexUpdateResponse};
|
||||
use crate::Data;
|
||||
|
||||
pub fn services(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(get).service(update).service(delete);
|
||||
}
|
||||
|
||||
#[get(
|
||||
"/indexes/{index_uid}/settings/synonyms",
|
||||
wrap = "Authentication::Private"
|
||||
)]
|
||||
async fn get(
|
||||
data: web::Data<Data>,
|
||||
path: web::Path<IndexParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index = data
|
||||
.db
|
||||
.open_index(&path.index_uid)
|
||||
.ok_or(Error::index_not_found(&path.index_uid))?;
|
||||
|
||||
let reader = data.db.main_read_txn()?;
|
||||
|
||||
let synonyms_list = index.main.synonyms(&reader)?;
|
||||
|
||||
let mut synonyms = IndexMap::new();
|
||||
let index_synonyms = &index.synonyms;
|
||||
for synonym in synonyms_list {
|
||||
let list = index_synonyms.synonyms(&reader, synonym.as_bytes())?;
|
||||
synonyms.insert(synonym, list);
|
||||
}
|
||||
|
||||
Ok(HttpResponse::Ok().json(synonyms))
|
||||
}
|
||||
|
||||
#[post(
|
||||
"/indexes/{index_uid}/settings/synonyms",
|
||||
wrap = "Authentication::Private"
|
||||
)]
|
||||
async fn update(
|
||||
data: web::Data<Data>,
|
||||
path: web::Path<IndexParam>,
|
||||
body: web::Json<BTreeMap<String, Vec<String>>>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let update_id = data.get_or_create_index(&path.index_uid, |index| {
|
||||
let settings = SettingsUpdate {
|
||||
synonyms: UpdateState::Update(body.into_inner()),
|
||||
..SettingsUpdate::default()
|
||||
};
|
||||
|
||||
Ok(data
|
||||
.db
|
||||
.update_write(|w| index.settings_update(w, settings))?)
|
||||
})?;
|
||||
|
||||
Ok(HttpResponse::Accepted().json(IndexUpdateResponse::with_id(update_id)))
|
||||
}
|
||||
|
||||
#[delete(
|
||||
"/indexes/{index_uid}/settings/synonyms",
|
||||
wrap = "Authentication::Private"
|
||||
)]
|
||||
async fn delete(
|
||||
data: web::Data<Data>,
|
||||
path: web::Path<IndexParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index = data
|
||||
.db
|
||||
.open_index(&path.index_uid)
|
||||
.ok_or(Error::index_not_found(&path.index_uid))?;
|
||||
|
||||
let settings = SettingsUpdate {
|
||||
synonyms: UpdateState::Clear,
|
||||
..SettingsUpdate::default()
|
||||
};
|
||||
|
||||
let update_id = data
|
||||
.db
|
||||
.update_write(|w| index.settings_update(w, settings))?;
|
||||
|
||||
Ok(HttpResponse::Accepted().json(IndexUpdateResponse::with_id(update_id)))
|
||||
}
|
@ -1,106 +0,0 @@
|
||||
use crate::Data;
|
||||
use crate::error::Error;
|
||||
use crate::helpers::compression;
|
||||
|
||||
use log::error;
|
||||
use std::fs::create_dir_all;
|
||||
use std::path::Path;
|
||||
use std::thread;
|
||||
use std::time::Duration;
|
||||
|
||||
pub fn load_snapshot(
|
||||
db_path: &str,
|
||||
snapshot_path: &Path,
|
||||
ignore_snapshot_if_db_exists: bool,
|
||||
ignore_missing_snapshot: bool
|
||||
) -> Result<(), Error> {
|
||||
let db_path = Path::new(db_path);
|
||||
|
||||
if !db_path.exists() && snapshot_path.exists() {
|
||||
compression::from_tar_gz(snapshot_path, db_path)
|
||||
} else if db_path.exists() && !ignore_snapshot_if_db_exists {
|
||||
Err(Error::Internal(format!("database already exists at {:?}, try to delete it or rename it", db_path.canonicalize().unwrap_or(db_path.into()))))
|
||||
} else if !snapshot_path.exists() && !ignore_missing_snapshot {
|
||||
Err(Error::Internal(format!("snapshot doesn't exist at {:?}", snapshot_path.canonicalize().unwrap_or(snapshot_path.into()))))
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn create_snapshot(data: &Data, snapshot_dir: impl AsRef<Path>, snapshot_name: impl AsRef<str>) -> Result<(), Error> {
|
||||
create_dir_all(&snapshot_dir)?;
|
||||
let tmp_dir = tempfile::tempdir_in(&snapshot_dir)?;
|
||||
|
||||
data.db.copy_and_compact_to_path(tmp_dir.path())?;
|
||||
|
||||
let temp_snapshot_file = tempfile::NamedTempFile::new_in(&snapshot_dir)?;
|
||||
|
||||
compression::to_tar_gz(tmp_dir.path(), temp_snapshot_file.path())
|
||||
.map_err(|e| Error::Internal(format!("something went wrong during snapshot compression: {}", e)))?;
|
||||
|
||||
let snapshot_path = snapshot_dir.as_ref().join(snapshot_name.as_ref());
|
||||
|
||||
temp_snapshot_file.persist(snapshot_path).map_err(|e| Error::Internal(e.to_string()))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn schedule_snapshot(data: Data, snapshot_dir: &Path, time_gap_s: u64) -> Result<(), Error> {
|
||||
if snapshot_dir.file_name().is_none() {
|
||||
return Err(Error::Internal("invalid snapshot file path".to_string()));
|
||||
}
|
||||
let db_name = Path::new(&data.db_path).file_name().ok_or_else(|| Error::Internal("invalid database name".to_string()))?;
|
||||
create_dir_all(snapshot_dir)?;
|
||||
let snapshot_name = format!("{}.snapshot", db_name.to_str().unwrap_or("data.ms"));
|
||||
let snapshot_dir = snapshot_dir.to_owned();
|
||||
|
||||
thread::spawn(move || loop {
|
||||
if let Err(e) = create_snapshot(&data, &snapshot_dir, &snapshot_name) {
|
||||
error!("Unsuccessful snapshot creation: {}", e);
|
||||
}
|
||||
thread::sleep(Duration::from_secs(time_gap_s));
|
||||
});
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::io::prelude::*;
|
||||
use std::fs;
|
||||
|
||||
#[test]
|
||||
fn test_pack_unpack() {
|
||||
let tempdir = tempfile::tempdir().unwrap();
|
||||
|
||||
let test_dir = tempdir.path();
|
||||
let src_dir = test_dir.join("src");
|
||||
let dest_dir = test_dir.join("complex/destination/path/");
|
||||
let archive_path = test_dir.join("archive.snapshot");
|
||||
|
||||
let file_1_relative = Path::new("file1.txt");
|
||||
let subdir_relative = Path::new("subdir/");
|
||||
let file_2_relative = Path::new("subdir/file2.txt");
|
||||
|
||||
create_dir_all(src_dir.join(subdir_relative)).unwrap();
|
||||
fs::File::create(src_dir.join(file_1_relative)).unwrap().write_all(b"Hello_file_1").unwrap();
|
||||
fs::File::create(src_dir.join(file_2_relative)).unwrap().write_all(b"Hello_file_2").unwrap();
|
||||
|
||||
|
||||
assert!(compression::to_tar_gz(&src_dir, &archive_path).is_ok());
|
||||
assert!(archive_path.exists());
|
||||
assert!(load_snapshot(&dest_dir.to_str().unwrap(), &archive_path, false, false).is_ok());
|
||||
|
||||
assert!(dest_dir.exists());
|
||||
assert!(dest_dir.join(file_1_relative).exists());
|
||||
assert!(dest_dir.join(subdir_relative).exists());
|
||||
assert!(dest_dir.join(file_2_relative).exists());
|
||||
|
||||
let contents = fs::read_to_string(dest_dir.join(file_1_relative)).unwrap();
|
||||
assert_eq!(contents, "Hello_file_1");
|
||||
|
||||
let contents = fs::read_to_string(dest_dir.join(file_2_relative)).unwrap();
|
||||
assert_eq!(contents, "Hello_file_2");
|
||||
}
|
||||
}
|
@ -1,12 +0,0 @@
|
||||
{
|
||||
"indices": [{
|
||||
"uid": "test",
|
||||
"primaryKey": "id"
|
||||
}, {
|
||||
"uid": "test2",
|
||||
"primaryKey": "test2_id"
|
||||
}
|
||||
],
|
||||
"dbVersion": "0.13.0",
|
||||
"dumpVersion": "1"
|
||||
}
|
@ -1,77 +0,0 @@
|
||||
{"id":0,"isActive":false,"balance":"$2,668.55","picture":"http://placehold.it/32x32","age":36,"color":"Green","name":"Lucas Hess","gender":"male","email":"lucashess@chorizon.com","phone":"+1 (998) 478-2597","address":"412 Losee Terrace, Blairstown, Georgia, 2825","about":"Mollit ad in exercitation quis. Anim est ut consequat fugiat duis magna aliquip velit nisi. Commodo eiusmod est consequat proident consectetur aliqua enim fugiat. Aliqua adipisicing laboris elit proident enim veniam laboris mollit. Incididunt fugiat minim ad nostrud deserunt tempor in. Id irure officia labore qui est labore nulla nisi. Magna sit quis tempor esse consectetur amet labore duis aliqua consequat.\r\n","registered":"2016-06-21T09:30:25 -02:00","latitude":-44.174957,"longitude":-145.725388,"tags":["bug","bug"]}
|
||||
{"id":1,"isActive":true,"balance":"$1,706.13","picture":"http://placehold.it/32x32","age":27,"color":"Green","name":"Cherry Orr","gender":"female","email":"cherryorr@chorizon.com","phone":"+1 (995) 479-3174","address":"442 Beverly Road, Ventress, New Mexico, 3361","about":"Exercitation officia mollit proident nostrud ea. Pariatur voluptate labore nostrud magna duis non elit et incididunt Lorem velit duis amet commodo. Irure in velit laboris pariatur. Do tempor ex deserunt duis minim amet.\r\n","registered":"2020-03-18T11:12:21 -01:00","latitude":-24.356932,"longitude":27.184808,"tags":["new issue","bug"]}
|
||||
{"id":2,"isActive":true,"balance":"$2,467.47","picture":"http://placehold.it/32x32","age":34,"color":"blue","name":"Patricia Goff","gender":"female","email":"patriciagoff@chorizon.com","phone":"+1 (864) 463-2277","address":"866 Hornell Loop, Cresaptown, Ohio, 1700","about":"Non culpa duis dolore Lorem aliqua. Labore veniam laborum cupidatat nostrud ea exercitation. Esse nostrud sit veniam laborum minim ullamco nulla aliqua est cillum magna. Duis non esse excepteur veniam voluptate sunt cupidatat nostrud consequat sint adipisicing ut excepteur. Incididunt sit aliquip non id magna amet deserunt esse quis dolor.\r\n","registered":"2014-10-28T12:59:30 -01:00","latitude":-64.008555,"longitude":11.867098,"tags":["good first issue"]}
|
||||
{"id":3,"isActive":true,"balance":"$3,344.40","picture":"http://placehold.it/32x32","age":35,"color":"blue","name":"Adeline Flynn","gender":"female","email":"adelineflynn@chorizon.com","phone":"+1 (994) 600-2840","address":"428 Paerdegat Avenue, Hollymead, Pennsylvania, 948","about":"Ex velit magna minim labore dolor id laborum incididunt. Proident dolor fugiat exercitation ad adipisicing amet dolore. Veniam nisi pariatur aute eu amet sint elit duis exercitation. Eu fugiat Lorem nostrud consequat aute sunt. Minim excepteur cillum laboris enim tempor adipisicing nulla reprehenderit ea velit Lorem qui in incididunt. Esse ipsum mollit deserunt ea exercitation ex aliqua anim magna cupidatat culpa.\r\n","registered":"2014-03-27T06:24:45 -01:00","latitude":-74.485173,"longitude":-11.059859,"tags":["bug","good first issue","wontfix","new issue"]}
|
||||
{"id":4,"isActive":false,"balance":"$2,575.78","picture":"http://placehold.it/32x32","age":39,"color":"Green","name":"Mariana Pacheco","gender":"female","email":"marianapacheco@chorizon.com","phone":"+1 (820) 414-2223","address":"664 Rapelye Street, Faywood, California, 7320","about":"Sint cillum enim eu Lorem dolore. Est excepteur cillum consequat incididunt. Ut consectetur et do culpa eiusmod ex ut id proident aliqua. Sunt dolor anim minim labore incididunt deserunt enim velit sunt ut in velit. Nulla ipsum cillum qui est minim officia in occaecat exercitation Lorem sunt. Aliqua minim excepteur tempor incididunt dolore. Quis amet ullamco et proident aliqua magna consequat.\r\n","registered":"2015-09-02T03:23:35 -02:00","latitude":75.763501,"longitude":-78.777124,"tags":["new issue"]}
|
||||
{"id":5,"isActive":true,"balance":"$3,793.09","picture":"http://placehold.it/32x32","age":20,"color":"Green","name":"Warren Watson","gender":"male","email":"warrenwatson@chorizon.com","phone":"+1 (807) 583-2427","address":"671 Prince Street, Faxon, Connecticut, 4275","about":"Cillum incididunt mollit labore ipsum elit ea. Lorem labore consectetur nulla ea fugiat sint esse cillum ea commodo id qui. Sint cillum mollit dolore enim quis esse. Nisi labore duis dolor tempor laborum laboris ad minim pariatur in excepteur sit. Aliqua anim amet sunt ullamco labore amet culpa irure esse eiusmod deserunt consequat Lorem nostrud.\r\n","registered":"2017-06-04T06:02:17 -02:00","latitude":29.979223,"longitude":25.358943,"tags":["wontfix","wontfix","wontfix"]}
|
||||
{"id":6,"isActive":true,"balance":"$2,919.70","picture":"http://placehold.it/32x32","age":20,"color":"blue","name":"Shelia Berry","gender":"female","email":"sheliaberry@chorizon.com","phone":"+1 (853) 511-2651","address":"437 Forrest Street, Coventry, Illinois, 2056","about":"Id occaecat qui voluptate proident culpa cillum nisi reprehenderit. Pariatur nostrud proident adipisicing reprehenderit eiusmod qui minim proident aliqua id cupidatat laboris deserunt. Proident sint laboris sit mollit dolor qui incididunt quis veniam cillum cupidatat ad nostrud ut. Aliquip consequat eiusmod eiusmod irure tempor do incididunt id culpa laboris eiusmod.\r\n","registered":"2018-07-11T02:45:01 -02:00","latitude":54.815991,"longitude":-118.690609,"tags":["good first issue","bug","wontfix","new issue"]}
|
||||
{"id":7,"isActive":true,"balance":"$1,349.50","picture":"http://placehold.it/32x32","age":28,"color":"Green","name":"Chrystal Boyd","gender":"female","email":"chrystalboyd@chorizon.com","phone":"+1 (936) 563-2802","address":"670 Croton Loop, Sussex, Florida, 4692","about":"Consequat ex voluptate consectetur laborum nulla. Qui voluptate Lorem amet labore est esse sunt. Nulla cupidatat consequat quis incididunt exercitation aliquip reprehenderit ea ea adipisicing reprehenderit id consectetur quis. Exercitation est incididunt ullamco non proident consequat. Nisi veniam aliquip fugiat voluptate ex id aute duis ullamco magna ipsum ad laborum ipsum. Cupidatat velit dolore esse nisi.\r\n","registered":"2016-11-01T07:36:04 -01:00","latitude":-24.711933,"longitude":147.246705,"tags":[]}
|
||||
{"id":8,"isActive":false,"balance":"$3,999.56","picture":"http://placehold.it/32x32","age":30,"color":"brown","name":"Martin Porter","gender":"male","email":"martinporter@chorizon.com","phone":"+1 (895) 580-2304","address":"577 Regent Place, Aguila, Guam, 6554","about":"Nostrud nulla labore ex excepteur labore enim cillum pariatur in do Lorem eiusmod ullamco est. Labore aliquip id ut nisi commodo pariatur ea esse laboris. Incididunt eu dolor esse excepteur nulla minim proident non cillum nisi dolore incididunt ipsum tempor.\r\n","registered":"2014-09-20T02:08:30 -02:00","latitude":-88.344273,"longitude":37.964466,"tags":[]}
|
||||
{"id":9,"isActive":true,"balance":"$3,729.71","picture":"http://placehold.it/32x32","age":26,"color":"blue","name":"Kelli Mendez","gender":"female","email":"kellimendez@chorizon.com","phone":"+1 (936) 401-2236","address":"242 Caton Place, Grazierville, Alabama, 3968","about":"Consectetur occaecat dolore esse eiusmod enim ea aliqua eiusmod amet velit laborum. Velit quis consequat consectetur velit fugiat labore commodo amet do. Magna minim est ad commodo consequat fugiat. Laboris duis Lorem ipsum irure sit ipsum consequat tempor sit. Est ad nulla duis quis velit anim id nulla. Cupidatat ea esse laboris eu veniam cupidatat proident veniam quis.\r\n","registered":"2018-05-04T10:35:30 -02:00","latitude":49.37551,"longitude":41.872323,"tags":["new issue","new issue"]}
|
||||
{"id":10,"isActive":false,"balance":"$1,127.47","picture":"http://placehold.it/32x32","age":27,"color":"blue","name":"Maddox Johns","gender":"male","email":"maddoxjohns@chorizon.com","phone":"+1 (892) 470-2357","address":"756 Beard Street, Avalon, Louisiana, 114","about":"Voluptate et dolor magna do do. Id do enim ut nulla esse culpa fugiat excepteur quis. Nostrud ad aliquip aliqua qui esse ut consequat proident deserunt esse cupidatat do elit fugiat. Sint cillum aliquip cillum laboris laborum laboris ad aliquip enim reprehenderit cillum eu sint. Sint ut ad duis do culpa non eiusmod amet non ipsum commodo. Pariatur aliquip sit deserunt non. Ut consequat pariatur deserunt veniam est sit eiusmod officia aliquip commodo sunt in eu duis.\r\n","registered":"2016-04-22T06:41:25 -02:00","latitude":66.640229,"longitude":-17.222666,"tags":["new issue","good first issue","good first issue","new issue"]}
|
||||
{"id":11,"isActive":true,"balance":"$1,351.43","picture":"http://placehold.it/32x32","age":28,"color":"Green","name":"Evans Wagner","gender":"male","email":"evanswagner@chorizon.com","phone":"+1 (889) 496-2332","address":"118 Monaco Place, Lutsen, Delaware, 6209","about":"Sunt consectetur enim ipsum consectetur occaecat reprehenderit nulla pariatur. Cupidatat do exercitation tempor voluptate duis nostrud dolor consectetur. Excepteur aliquip Lorem voluptate cillum est. Nisi velit nulla nostrud ea id officia laboris et.\r\n","registered":"2016-10-27T01:26:31 -02:00","latitude":-77.673222,"longitude":-142.657214,"tags":["good first issue","good first issue"]}
|
||||
{"id":12,"isActive":false,"balance":"$3,394.96","picture":"http://placehold.it/32x32","age":25,"color":"blue","name":"Aida Kirby","gender":"female","email":"aidakirby@chorizon.com","phone":"+1 (942) 532-2325","address":"797 Engert Avenue, Wilsonia, Idaho, 6532","about":"Mollit aute esse Lorem do laboris anim reprehenderit excepteur. Ipsum culpa esse voluptate officia cupidatat minim. Velit officia proident nostrud sunt irure labore. Culpa ex commodo amet dolor amet voluptate Lorem ex esse commodo fugiat quis non. Ex est adipisicing veniam sunt dolore ut aliqua nisi ex sit. Esse voluptate esse anim id adipisicing enim aute ea exercitation tempor cillum.\r\n","registered":"2018-06-18T04:39:57 -02:00","latitude":-58.062041,"longitude":34.999254,"tags":["new issue","wontfix","bug","new issue"]}
|
||||
{"id":13,"isActive":true,"balance":"$2,812.62","picture":"http://placehold.it/32x32","age":40,"color":"blue","name":"Nelda Burris","gender":"female","email":"neldaburris@chorizon.com","phone":"+1 (813) 600-2576","address":"160 Opal Court, Fowlerville, Tennessee, 2170","about":"Ipsum aliquip adipisicing elit magna. Veniam irure quis laborum laborum sint velit amet. Irure non eiusmod laborum fugiat qui quis Lorem culpa veniam commodo. Fugiat cupidatat dolore et consequat pariatur enim ex velit consequat deserunt quis. Deserunt et quis laborum cupidatat cillum minim cupidatat nisi do commodo commodo labore cupidatat ea. In excepteur sit nostrud nulla nostrud dolor sint. Et anim culpa aliquip laborum Lorem elit.\r\n","registered":"2015-08-15T12:39:53 -02:00","latitude":66.6871,"longitude":179.549488,"tags":["wontfix"]}
|
||||
{"id":14,"isActive":true,"balance":"$1,718.33","picture":"http://placehold.it/32x32","age":35,"color":"blue","name":"Jennifer Hart","gender":"female","email":"jenniferhart@chorizon.com","phone":"+1 (850) 537-2513","address":"124 Veranda Place, Nash, Utah, 985","about":"Amet amet voluptate in occaecat pariatur. Nulla ipsum esse quis qui in quis qui. Non est non nisi qui tempor commodo consequat fugiat. Sint eu ipsum aute anim anim. Ea nostrud excepteur exercitation consectetur Lorem.\r\n","registered":"2016-09-04T11:46:59 -02:00","latitude":-66.827751,"longitude":99.220079,"tags":["wontfix","bug","new issue","new issue"]}
|
||||
{"id":15,"isActive":false,"balance":"$2,698.16","picture":"http://placehold.it/32x32","age":28,"color":"blue","name":"Aurelia Contreras","gender":"female","email":"aureliacontreras@chorizon.com","phone":"+1 (932) 442-3103","address":"655 Dwight Street, Grapeview, Palau, 8356","about":"Qui adipisicing consectetur aute veniam culpa ipsum. Occaecat occaecat ut mollit enim enim elit Lorem nostrud Lorem. Consequat laborum mollit nulla aute cillum sunt mollit commodo velit culpa. Pariatur pariatur velit nostrud tempor. In minim enim cillum exercitation in laboris labore ea sunt in incididunt fugiat.\r\n","registered":"2014-09-11T10:43:15 -02:00","latitude":-71.328973,"longitude":133.404895,"tags":["wontfix","bug","good first issue"]}
|
||||
{"id":16,"isActive":true,"balance":"$3,303.25","picture":"http://placehold.it/32x32","age":28,"color":"brown","name":"Estella Bass","gender":"female","email":"estellabass@chorizon.com","phone":"+1 (825) 436-2909","address":"435 Rockwell Place, Garberville, Wisconsin, 2230","about":"Sit eiusmod mollit velit non. Qui ea in exercitation elit reprehenderit occaecat tempor minim officia. Culpa amet voluptate sit eiusmod pariatur.\r\n","registered":"2017-11-23T09:32:09 -01:00","latitude":81.17014,"longitude":-145.262693,"tags":["new issue"]}
|
||||
{"id":17,"isActive":false,"balance":"$3,579.20","picture":"http://placehold.it/32x32","age":25,"color":"brown","name":"Ortega Brennan","gender":"male","email":"ortegabrennan@chorizon.com","phone":"+1 (906) 526-2287","address":"440 Berry Street, Rivera, Maine, 1849","about":"Veniam velit non laboris consectetur sit aliquip enim proident velit in ipsum reprehenderit reprehenderit. Dolor qui nulla adipisicing ad magna dolore do ut duis et aute est. Qui est elit cupidatat nostrud. Laboris voluptate reprehenderit minim sint exercitation cupidatat ipsum sint consectetur velit sunt et officia incididunt. Ut amet Lorem minim deserunt officia officia irure qui et Lorem deserunt culpa sit.\r\n","registered":"2016-03-31T02:17:13 -02:00","latitude":-68.407524,"longitude":-113.642067,"tags":["new issue","wontfix"]}
|
||||
{"id":18,"isActive":false,"balance":"$1,484.92","picture":"http://placehold.it/32x32","age":39,"color":"blue","name":"Leonard Tillman","gender":"male","email":"leonardtillman@chorizon.com","phone":"+1 (864) 541-3456","address":"985 Provost Street, Charco, New Hampshire, 8632","about":"Consectetur ut magna sit id officia nostrud ipsum. Lorem cupidatat laborum nostrud aliquip magna qui est cupidatat exercitation et. Officia qui magna commodo id cillum magna ut ad veniam sunt sint ex. Id minim do in do exercitation aliquip incididunt ex esse. Nisi aliqua quis excepteur qui aute excepteur dolore eu pariatur irure id eu cupidatat eiusmod. Aliqua amet et dolore enim et eiusmod qui irure pariatur qui officia adipisicing nulla duis.\r\n","registered":"2018-05-06T08:21:27 -02:00","latitude":-8.581801,"longitude":-61.910062,"tags":["wontfix","new issue","bug","bug"]}
|
||||
{"id":19,"isActive":true,"balance":"$3,572.55","picture":"http://placehold.it/32x32","age":33,"color":"brown","name":"Dale Payne","gender":"male","email":"dalepayne@chorizon.com","phone":"+1 (814) 469-3499","address":"536 Dare Court, Ironton, Arkansas, 8605","about":"Et velit cupidatat velit incididunt mollit. Occaecat do labore aliqua dolore excepteur occaecat ut veniam ad ullamco tempor. Ut anim laboris deserunt culpa esse. Pariatur Lorem nulla cillum cupidatat nostrud Lorem commodo reprehenderit ut est. In dolor cillum reprehenderit laboris incididunt ad reprehenderit aute ipsum officia id in consequat. Culpa exercitation voluptate fugiat est Lorem ipsum in dolore dolor consequat Lorem et.\r\n","registered":"2019-10-11T01:01:33 -02:00","latitude":-18.280968,"longitude":-126.091797,"tags":["bug","wontfix","wontfix","wontfix"]}
|
||||
{"id":20,"isActive":true,"balance":"$1,986.48","picture":"http://placehold.it/32x32","age":38,"color":"Green","name":"Florence Long","gender":"female","email":"florencelong@chorizon.com","phone":"+1 (972) 557-3858","address":"519 Hendrickson Street, Templeton, Hawaii, 2389","about":"Quis officia occaecat veniam veniam. Ex minim enim labore cupidatat qui. Proident esse deserunt laborum laboris sunt nostrud.\r\n","registered":"2016-05-02T09:18:59 -02:00","latitude":-27.110866,"longitude":-45.09445,"tags":[]}
|
||||
{"id":21,"isActive":true,"balance":"$1,440.09","picture":"http://placehold.it/32x32","age":40,"color":"blue","name":"Levy Whitley","gender":"male","email":"levywhitley@chorizon.com","phone":"+1 (911) 458-2411","address":"187 Thomas Street, Hachita, North Carolina, 2989","about":"Velit laboris non minim elit sint deserunt fugiat. Aute minim ex commodo aute cillum aliquip fugiat pariatur nulla eiusmod pariatur consectetur. Qui ex ea qui laborum veniam adipisicing magna minim ut. In irure anim voluptate mollit et. Adipisicing labore ea mollit magna aliqua culpa velit est. Excepteur nisi veniam enim velit in ad officia irure laboris.\r\n","registered":"2014-04-30T07:31:38 -02:00","latitude":-6.537315,"longitude":171.813536,"tags":["bug"]}
|
||||
{"id":22,"isActive":false,"balance":"$2,938.57","picture":"http://placehold.it/32x32","age":35,"color":"blue","name":"Bernard Mcfarland","gender":"male","email":"bernardmcfarland@chorizon.com","phone":"+1 (979) 442-3386","address":"409 Hall Street, Keyport, Federated States Of Micronesia, 7011","about":"Reprehenderit irure aute et anim ullamco enim est tempor id ipsum mollit veniam aute ullamco. Consectetur dolor velit tempor est reprehenderit ut id non est ullamco voluptate. Commodo aute ullamco culpa non voluptate incididunt non culpa culpa nisi id proident cupidatat.\r\n","registered":"2017-08-10T10:07:59 -02:00","latitude":63.766795,"longitude":68.177069,"tags":[]}
|
||||
{"id":23,"isActive":true,"balance":"$1,678.49","picture":"http://placehold.it/32x32","age":31,"color":"brown","name":"Blanca Mcclain","gender":"female","email":"blancamcclain@chorizon.com","phone":"+1 (976) 439-2772","address":"176 Crooke Avenue, Valle, Virginia, 5373","about":"Aliquip sunt irure ut consectetur elit. Cillum amet incididunt et anim elit in incididunt adipisicing fugiat veniam esse veniam. Nisi qui sit occaecat tempor nostrud est aute cillum anim excepteur laboris magna in. Fugiat fugiat veniam cillum laborum ut pariatur amet nulla nulla. Nostrud mollit in laborum minim exercitation aute. Lorem aute ipsum laboris est adipisicing qui ullamco tempor adipisicing cupidatat mollit.\r\n","registered":"2015-10-12T11:57:28 -02:00","latitude":-8.944564,"longitude":-150.711709,"tags":["bug","wontfix","good first issue"]}
|
||||
{"id":24,"isActive":true,"balance":"$2,276.87","picture":"http://placehold.it/32x32","age":28,"color":"brown","name":"Espinoza Ford","gender":"male","email":"espinozaford@chorizon.com","phone":"+1 (945) 429-3975","address":"137 Bowery Street, Itmann, District Of Columbia, 1864","about":"Deserunt nisi aliquip esse occaecat laborum qui aliqua excepteur ea cupidatat dolore magna consequat. Culpa aliquip cillum incididunt proident est officia consequat duis. Elit tempor ut cupidatat nisi ea sint non labore aliquip amet. Deserunt labore cupidatat laboris dolor duis occaecat velit aliquip reprehenderit esse. Sit ad qui consectetur id anim nisi amet eiusmod.\r\n","registered":"2014-03-26T02:16:08 -01:00","latitude":-37.137666,"longitude":-51.811757,"tags":["wontfix","bug"]}
|
||||
{"id":25,"isActive":true,"balance":"$3,973.43","picture":"http://placehold.it/32x32","age":29,"color":"Green","name":"Sykes Conley","gender":"male","email":"sykesconley@chorizon.com","phone":"+1 (851) 401-3916","address":"345 Grand Street, Woodlands, Missouri, 4461","about":"Pariatur ullamco duis reprehenderit ad sit dolore. Dolore ex fugiat labore incididunt nostrud. Minim deserunt officia sunt enim magna elit veniam reprehenderit nisi cupidatat dolor eiusmod. Veniam laboris sint cillum et laboris nostrud culpa laboris anim. Incididunt velit pariatur cupidatat sit dolore in. Voluptate consectetur officia id nostrud velit mollit dolor. Id laboris consectetur culpa sunt pariatur minim sunt laboris sit.\r\n","registered":"2015-09-12T06:03:56 -02:00","latitude":67.282955,"longitude":-64.341323,"tags":["wontfix"]}
|
||||
{"id":26,"isActive":false,"balance":"$1,431.50","picture":"http://placehold.it/32x32","age":35,"color":"blue","name":"Barlow Duran","gender":"male","email":"barlowduran@chorizon.com","phone":"+1 (995) 436-2562","address":"481 Everett Avenue, Allison, Nebraska, 3065","about":"Proident quis eu officia adipisicing aliquip. Lorem laborum magna dolor et incididunt cillum excepteur et amet. Veniam consectetur officia fugiat magna consequat dolore elit aute exercitation fugiat excepteur ullamco. Sit qui proident reprehenderit ea ad qui culpa exercitation reprehenderit anim cupidatat. Nulla et duis Lorem cillum duis pariatur amet voluptate labore ut aliqua mollit anim ea. Nostrud incididunt et proident adipisicing non consequat tempor ullamco adipisicing incididunt. Incididunt cupidatat tempor fugiat officia qui eiusmod reprehenderit.\r\n","registered":"2017-06-29T04:28:43 -02:00","latitude":-38.70606,"longitude":55.02816,"tags":["new issue"]}
|
||||
{"id":27,"isActive":true,"balance":"$3,478.27","picture":"http://placehold.it/32x32","age":31,"color":"blue","name":"Schwartz Morgan","gender":"male","email":"schwartzmorgan@chorizon.com","phone":"+1 (861) 507-2067","address":"451 Lincoln Road, Fairlee, Washington, 2717","about":"Labore eiusmod sint dolore sunt eiusmod esse et in id aliquip. Aliqua consequat occaecat laborum labore ipsum enim non nostrud adipisicing adipisicing cillum occaecat. Duis minim est culpa sunt nulla ullamco adipisicing magna irure. Occaecat quis irure eiusmod fugiat quis commodo reprehenderit labore cillum commodo id et.\r\n","registered":"2016-05-10T08:34:54 -02:00","latitude":-75.886403,"longitude":93.044471,"tags":["bug","bug","wontfix","wontfix"]}
|
||||
{"id":28,"isActive":true,"balance":"$2,825.59","picture":"http://placehold.it/32x32","age":32,"color":"blue","name":"Kristy Leon","gender":"female","email":"kristyleon@chorizon.com","phone":"+1 (948) 465-2563","address":"594 Macon Street, Floris, South Dakota, 3565","about":"Proident veniam voluptate magna id do. Laboris enim dolor culpa quis. Esse voluptate elit commodo duis incididunt velit aliqua. Qui aute commodo incididunt elit eu Lorem dolore. Non esse duis do reprehenderit culpa minim. Ullamco consequat id do exercitation exercitation mollit ipsum velit eiusmod quis.\r\n","registered":"2014-12-14T04:10:29 -01:00","latitude":-50.01615,"longitude":-68.908804,"tags":["wontfix","good first issue"]}
|
||||
{"id":29,"isActive":false,"balance":"$3,028.03","picture":"http://placehold.it/32x32","age":39,"color":"blue","name":"Ashley Pittman","gender":"male","email":"ashleypittman@chorizon.com","phone":"+1 (928) 507-3523","address":"646 Adelphi Street, Clara, Colorado, 6056","about":"Incididunt cillum consectetur nulla sit sit labore nulla sit. Ullamco nisi mollit reprehenderit tempor irure in Lorem duis. Sunt eu aute laboris dolore commodo ipsum sint cupidatat veniam amet culpa incididunt aute ad. Quis dolore aliquip id aute mollit eiusmod nisi ipsum ut labore adipisicing do culpa.\r\n","registered":"2016-01-07T10:40:48 -01:00","latitude":-58.766037,"longitude":-124.828485,"tags":["wontfix"]}
|
||||
{"id":30,"isActive":true,"balance":"$2,021.11","picture":"http://placehold.it/32x32","age":32,"color":"blue","name":"Stacy Espinoza","gender":"female","email":"stacyespinoza@chorizon.com","phone":"+1 (999) 487-3253","address":"931 Alabama Avenue, Bangor, Alaska, 8215","about":"Id reprehenderit cupidatat exercitation anim ad nisi irure. Minim est proident mollit laborum. Duis ad duis eiusmod quis.\r\n","registered":"2014-07-16T06:15:53 -02:00","latitude":41.560197,"longitude":177.697,"tags":["new issue","new issue","bug"]}
|
||||
{"id":31,"isActive":false,"balance":"$3,609.82","picture":"http://placehold.it/32x32","age":32,"color":"blue","name":"Vilma Garza","gender":"female","email":"vilmagarza@chorizon.com","phone":"+1 (944) 585-2021","address":"565 Tech Place, Sedley, Puerto Rico, 858","about":"Excepteur et fugiat mollit incididunt cupidatat. Mollit nisi veniam sint eu exercitation amet labore. Voluptate est magna est amet qui minim excepteur cupidatat dolor quis id excepteur aliqua reprehenderit. Proident nostrud ex veniam officia nisi enim occaecat ex magna officia id consectetur ad eu. In et est reprehenderit cupidatat ad minim veniam proident nulla elit nisi veniam proident ex. Eu in irure sit veniam amet incididunt fugiat proident quis ullamco laboris.\r\n","registered":"2017-06-30T07:43:52 -02:00","latitude":-12.574889,"longitude":-54.771186,"tags":["new issue","wontfix","wontfix"]}
|
||||
{"id":32,"isActive":false,"balance":"$2,882.34","picture":"http://placehold.it/32x32","age":38,"color":"brown","name":"June Dunlap","gender":"female","email":"junedunlap@chorizon.com","phone":"+1 (997) 504-2937","address":"353 Cozine Avenue, Goodville, Indiana, 1438","about":"Non dolore ut Lorem dolore amet veniam fugiat reprehenderit ut amet ea ut. Non aliquip cillum ad occaecat non et sint quis proident velit laborum ullamco et. Quis qui tempor eu voluptate et proident duis est commodo laboris ex enim. Nisi aliquip laboris nostrud veniam aliqua ullamco. Et officia proident dolor aliqua incididunt veniam proident.\r\n","registered":"2016-08-23T08:54:11 -02:00","latitude":-27.883363,"longitude":-163.919683,"tags":["new issue","new issue","bug","wontfix"]}
|
||||
{"id":33,"isActive":true,"balance":"$3,556.54","picture":"http://placehold.it/32x32","age":33,"color":"brown","name":"Cecilia Greer","gender":"female","email":"ceciliagreer@chorizon.com","phone":"+1 (977) 573-3498","address":"696 Withers Street, Lydia, Oklahoma, 3220","about":"Dolor pariatur veniam ad enim eiusmod fugiat ullamco nulla veniam. Dolore dolor sit excepteur veniam adipisicing adipisicing excepteur commodo qui reprehenderit magna exercitation enim reprehenderit. Cupidatat eu ullamco excepteur sint do. Et cupidatat ex adipisicing veniam eu tempor reprehenderit ut eiusmod amet proident veniam nostrud. Tempor ex enim mollit laboris magna tempor. Et aliqua nostrud esse pariatur quis. Ut pariatur ea ipsum pariatur.\r\n","registered":"2017-01-13T11:30:12 -01:00","latitude":60.467215,"longitude":84.684575,"tags":["wontfix","good first issue","good first issue","wontfix"]}
|
||||
{"id":34,"isActive":true,"balance":"$1,413.35","picture":"http://placehold.it/32x32","age":33,"color":"brown","name":"Mckay Schroeder","gender":"male","email":"mckayschroeder@chorizon.com","phone":"+1 (816) 480-3657","address":"958 Miami Court, Rehrersburg, Northern Mariana Islands, 567","about":"Amet do velit excepteur tempor sit eu voluptate. Excepteur amet culpa ipsum in pariatur mollit amet nisi veniam. Laboris elit consectetur id anim qui laboris. Reprehenderit mollit laboris occaecat esse sunt Lorem Lorem sunt occaecat.\r\n","registered":"2016-02-08T04:50:15 -01:00","latitude":-72.413287,"longitude":-159.254371,"tags":["good first issue"]}
|
||||
{"id":35,"isActive":true,"balance":"$2,306.53","picture":"http://placehold.it/32x32","age":34,"color":"blue","name":"Sawyer Mccormick","gender":"male","email":"sawyermccormick@chorizon.com","phone":"+1 (829) 569-3012","address":"749 Apollo Street, Eastvale, Texas, 7373","about":"Est irure ex occaecat aute. Lorem ad ullamco esse cillum deserunt qui proident anim officia dolore. Incididunt tempor cupidatat nulla cupidatat ullamco reprehenderit Lorem. Laboris tempor do pariatur sint non officia id qui deserunt amet Lorem pariatur consectetur exercitation. Adipisicing reprehenderit pariatur duis ex cupidatat cillum ad laboris ex. Sunt voluptate pariatur esse amet dolore minim aliquip reprehenderit nisi velit mollit.\r\n","registered":"2019-11-30T11:53:23 -01:00","latitude":-48.978194,"longitude":110.950191,"tags":["good first issue","new issue","new issue","bug"]}
|
||||
{"id":36,"isActive":false,"balance":"$1,844.54","picture":"http://placehold.it/32x32","age":37,"color":"brown","name":"Barbra Valenzuela","gender":"female","email":"barbravalenzuela@chorizon.com","phone":"+1 (992) 512-2649","address":"617 Schenck Court, Reinerton, Michigan, 2908","about":"Deserunt adipisicing nisi et amet aliqua amet. Veniam occaecat et elit excepteur veniam. Aute irure culpa nostrud occaecat. Excepteur sit aute mollit commodo. Do ex pariatur consequat sint Lorem veniam laborum excepteur. Non voluptate ex laborum enim irure. Adipisicing excepteur anim elit esse.\r\n","registered":"2019-03-29T01:59:31 -01:00","latitude":45.193723,"longitude":-12.486778,"tags":["new issue","new issue","wontfix","wontfix"]}
|
||||
{"id":37,"isActive":false,"balance":"$3,469.82","picture":"http://placehold.it/32x32","age":39,"color":"brown","name":"Opal Weiss","gender":"female","email":"opalweiss@chorizon.com","phone":"+1 (809) 400-3079","address":"535 Bogart Street, Frizzleburg, Arizona, 5222","about":"Reprehenderit nostrud minim adipisicing voluptate nisi consequat id sint. Proident tempor est esse cupidatat minim irure esse do do sint dolor. In officia duis et voluptate Lorem minim cupidatat ipsum enim qui dolor quis in Lorem. Aliquip commodo ex quis exercitation reprehenderit. Lorem id reprehenderit cillum adipisicing sunt ipsum incididunt incididunt.\r\n","registered":"2019-09-04T07:22:28 -02:00","latitude":72.50376,"longitude":61.656435,"tags":["bug","bug","good first issue","good first issue"]}
|
||||
{"id":38,"isActive":true,"balance":"$1,992.38","picture":"http://placehold.it/32x32","age":40,"color":"Green","name":"Christina Short","gender":"female","email":"christinashort@chorizon.com","phone":"+1 (884) 589-2705","address":"594 Willmohr Street, Dexter, Montana, 660","about":"Quis commodo eu dolor incididunt. Nisi magna mollit nostrud do consequat irure exercitation mollit aute deserunt. Magna aute quis occaecat incididunt deserunt tempor nostrud sint ullamco ipsum. Anim in occaecat exercitation laborum nostrud eiusmod reprehenderit ea culpa et sit. Culpa voluptate consectetur nostrud do eu fugiat excepteur officia pariatur enim duis amet.\r\n","registered":"2014-01-21T09:31:56 -01:00","latitude":-42.762739,"longitude":77.052349,"tags":["bug","new issue"]}
|
||||
{"id":39,"isActive":false,"balance":"$1,722.85","picture":"http://placehold.it/32x32","age":29,"color":"brown","name":"Golden Horton","gender":"male","email":"goldenhorton@chorizon.com","phone":"+1 (903) 426-2489","address":"191 Schenck Avenue, Mayfair, North Dakota, 5000","about":"Cillum velit aliqua velit in quis do mollit in et veniam. Nostrud proident non irure commodo. Ea culpa duis enim adipisicing do sint et est culpa reprehenderit officia laborum. Non et nostrud tempor nostrud nostrud ea duis esse laboris occaecat laborum. In eu ipsum sit tempor esse eiusmod enim aliquip aute. Officia ea anim ea ea. Consequat aute deserunt tempor nulla nisi tempor velit.\r\n","registered":"2015-08-19T02:56:41 -02:00","latitude":69.922534,"longitude":9.881433,"tags":["bug"]}
|
||||
{"id":40,"isActive":false,"balance":"$1,656.54","picture":"http://placehold.it/32x32","age":21,"color":"blue","name":"Stafford Emerson","gender":"male","email":"staffordemerson@chorizon.com","phone":"+1 (992) 455-2573","address":"523 Thornton Street, Conway, Vermont, 6331","about":"Adipisicing cupidatat elit minim elit nostrud elit non eiusmod sunt ut. Enim minim irure officia irure occaecat mollit eu nostrud eiusmod adipisicing sunt. Elit deserunt commodo minim dolor qui. Nostrud officia ex proident mollit et dolor tempor pariatur. Ex consequat tempor eiusmod irure mollit cillum laboris est veniam ea mollit deserunt. Tempor sit voluptate excepteur elit ullamco.\r\n","registered":"2019-02-16T04:07:08 -01:00","latitude":-29.143111,"longitude":-57.207703,"tags":["wontfix","good first issue","good first issue"]}
|
||||
{"id":41,"isActive":false,"balance":"$1,861.56","picture":"http://placehold.it/32x32","age":21,"color":"brown","name":"Salinas Gamble","gender":"male","email":"salinasgamble@chorizon.com","phone":"+1 (901) 525-2373","address":"991 Nostrand Avenue, Kansas, Mississippi, 6756","about":"Consequat tempor adipisicing cupidatat aliquip. Mollit proident incididunt ad ipsum laborum. Dolor in elit minim aliquip aliquip voluptate reprehenderit mollit eiusmod excepteur aliquip minim nulla cupidatat.\r\n","registered":"2017-08-21T05:47:53 -02:00","latitude":-22.593819,"longitude":-63.613004,"tags":["good first issue","bug","bug","wontfix"]}
|
||||
{"id":42,"isActive":true,"balance":"$3,179.74","picture":"http://placehold.it/32x32","age":34,"color":"brown","name":"Graciela Russell","gender":"female","email":"gracielarussell@chorizon.com","phone":"+1 (893) 464-3951","address":"361 Greenpoint Avenue, Shrewsbury, New Jersey, 4713","about":"Ex amet duis incididunt consequat minim dolore deserunt reprehenderit adipisicing in mollit aliqua adipisicing sunt. In ullamco eu qui est eiusmod qui. Fugiat esse est Lorem dolore nisi mollit exercitation. Aliquip occaecat esse exercitation ex non aute velit excepteur duis aliquip id. Velit id non aliquip fugiat minim qui exercitation culpa tempor consectetur. Minim dolor labore ea aute aute eu.\r\n","registered":"2015-05-18T09:52:56 -02:00","latitude":-14.634444,"longitude":12.931783,"tags":["wontfix","bug","wontfix"]}
|
||||
{"id":43,"isActive":true,"balance":"$1,777.38","picture":"http://placehold.it/32x32","age":25,"color":"blue","name":"Arnold Bender","gender":"male","email":"arnoldbender@chorizon.com","phone":"+1 (945) 581-3808","address":"781 Lorraine Street, Gallina, American Samoa, 1832","about":"Et mollit laboris duis ut duis eiusmod aute laborum duis irure labore deserunt. Ut occaecat ullamco quis excepteur. Et commodo non sint laboris tempor laboris aliqua consequat magna ea aute minim tempor pariatur. Dolore occaecat qui irure Lorem nulla consequat non.\r\n","registered":"2018-12-23T02:26:30 -01:00","latitude":41.208579,"longitude":51.948925,"tags":["bug","good first issue","good first issue","wontfix"]}
|
||||
{"id":44,"isActive":true,"balance":"$2,893.45","picture":"http://placehold.it/32x32","age":22,"color":"Green","name":"Joni Spears","gender":"female","email":"jonispears@chorizon.com","phone":"+1 (916) 565-2124","address":"307 Harwood Place, Canterwood, Maryland, 2047","about":"Dolore consequat deserunt aliquip duis consequat minim occaecat enim est. Nulla aute reprehenderit est enim duis cillum ullamco aliquip eiusmod sunt. Labore eiusmod aliqua Lorem velit aliqua quis ex mollit mollit duis culpa et qui in. Cupidatat est id ullamco irure dolor nulla.\r\n","registered":"2015-03-01T12:38:28 -01:00","latitude":8.19071,"longitude":146.323808,"tags":["wontfix","new issue","good first issue","good first issue"]}
|
||||
{"id":45,"isActive":true,"balance":"$2,830.36","picture":"http://placehold.it/32x32","age":20,"color":"brown","name":"Irene Bennett","gender":"female","email":"irenebennett@chorizon.com","phone":"+1 (904) 431-2211","address":"353 Ridgecrest Terrace, Springdale, Marshall Islands, 2686","about":"Consectetur Lorem dolor reprehenderit sunt duis. Pariatur non velit velit veniam elit reprehenderit in. Aute quis Lorem quis pariatur Lorem incididunt nulla magna adipisicing. Et id occaecat labore officia occaecat occaecat adipisicing.\r\n","registered":"2018-04-17T05:18:51 -02:00","latitude":-36.435177,"longitude":-127.552573,"tags":["bug","wontfix"]}
|
||||
{"id":46,"isActive":true,"balance":"$1,348.04","picture":"http://placehold.it/32x32","age":34,"color":"Green","name":"Lawson Curtis","gender":"male","email":"lawsoncurtis@chorizon.com","phone":"+1 (896) 532-2172","address":"942 Gerritsen Avenue, Southmont, Kansas, 8915","about":"Amet consectetur minim aute nostrud excepteur sint labore in culpa. Mollit qui quis ea amet sint ex incididunt nulla. Elit id esse ea consectetur laborum consequat occaecat aute consectetur ex. Commodo duis aute elit occaecat cupidatat non consequat ad officia qui dolore nostrud reprehenderit. Occaecat velit velit adipisicing exercitation consectetur. Incididunt et amet nostrud tempor do esse ullamco est Lorem irure. Eu aliqua eu exercitation sint.\r\n","registered":"2016-08-23T01:41:09 -02:00","latitude":-48.783539,"longitude":20.492944,"tags":[]}
|
||||
{"id":47,"isActive":true,"balance":"$1,132.41","picture":"http://placehold.it/32x32","age":38,"color":"Green","name":"Goff May","gender":"male","email":"goffmay@chorizon.com","phone":"+1 (859) 453-3415","address":"225 Rutledge Street, Boonville, Massachusetts, 4081","about":"Sint occaecat velit anim sint reprehenderit est. Adipisicing ea pariatur amet id non ex. Aute id laborum tempor aliquip magna ex eu incididunt aliquip eiusmod elit quis dolor. Anim est minim deserunt amet exercitation nulla elit nulla nulla culpa ullamco. Velit consectetur ipsum amet proident labore excepteur ut id excepteur voluptate commodo. Exercitation et laboris labore esse est laboris consectetur et sint.\r\n","registered":"2014-10-25T07:32:30 -02:00","latitude":13.079225,"longitude":76.215086,"tags":["bug"]}
|
||||
{"id":48,"isActive":true,"balance":"$1,201.87","picture":"http://placehold.it/32x32","age":38,"color":"Green","name":"Goodman Becker","gender":"male","email":"goodmanbecker@chorizon.com","phone":"+1 (825) 470-3437","address":"388 Seigel Street, Sisquoc, Kentucky, 8231","about":"Velit excepteur aute esse fugiat laboris aliqua magna. Est ex sit do labore ullamco aliquip. Duis ea commodo nostrud in fugiat. Aliqua consequat mollit dolore excepteur nisi ullamco commodo ea nostrud ea minim. Minim occaecat ut laboris ea consectetur veniam ipsum qui sit tempor incididunt anim amet eu. Velit sint incididunt eu adipisicing ipsum qui labore. Anim commodo labore reprehenderit aliquip labore elit minim deserunt amet exercitation officia non ea consectetur.\r\n","registered":"2019-09-05T04:49:03 -02:00","latitude":-23.792094,"longitude":-13.621221,"tags":["bug","bug","wontfix","new issue"]}
|
||||
{"id":49,"isActive":true,"balance":"$1,476.39","picture":"http://placehold.it/32x32","age":28,"color":"brown","name":"Maureen Dale","gender":"female","email":"maureendale@chorizon.com","phone":"+1 (984) 538-3684","address":"817 Newton Street, Bannock, Wyoming, 1468","about":"Tempor mollit exercitation excepteur cupidatat reprehenderit ad ex. Nulla laborum proident incididunt quis. Esse laborum deserunt qui anim. Sunt incididunt pariatur cillum anim proident eu ullamco dolor excepteur. Ullamco amet culpa nostrud adipisicing duis aliqua consequat duis non eu id mollit velit. Deserunt ullamco amet in occaecat.\r\n","registered":"2018-04-26T06:04:40 -02:00","latitude":-64.196802,"longitude":-117.396238,"tags":["wontfix"]}
|
||||
{"id":50,"isActive":true,"balance":"$1,947.08","picture":"http://placehold.it/32x32","age":21,"color":"Green","name":"Guerra Mcintyre","gender":"male","email":"guerramcintyre@chorizon.com","phone":"+1 (951) 536-2043","address":"423 Lombardy Street, Stewart, West Virginia, 908","about":"Sunt proident proident deserunt exercitation consectetur deserunt labore non commodo amet. Duis aute aliqua amet deserunt consectetur velit. Quis Lorem dolore occaecat deserunt reprehenderit non esse ullamco nostrud enim sunt ea fugiat. Elit amet veniam eu magna tempor. Mollit cupidatat laboris ex deserunt et labore sit tempor nostrud anim. Tempor aliqua occaecat voluptate reprehenderit eiusmod aliqua incididunt officia.\r\n","registered":"2015-07-16T05:11:42 -02:00","latitude":79.733743,"longitude":-20.602356,"tags":["bug","good first issue","good first issue"]}
|
||||
{"id":51,"isActive":true,"balance":"$2,960.90","picture":"http://placehold.it/32x32","age":23,"color":"blue","name":"Key Cervantes","gender":"male","email":"keycervantes@chorizon.com","phone":"+1 (931) 474-3865","address":"410 Barbey Street, Vernon, Oregon, 2328","about":"Duis amet minim eu consectetur laborum ad exercitation eiusmod nulla velit cillum consectetur. Nostrud aliqua cillum minim veniam quis do cupidatat mollit laborum. Culpa fugiat consectetur cillum non occaecat tempor non fugiat esse pariatur in ullamco. Occaecat amet officia et culpa officia deserunt in qui magna aute consequat eiusmod.\r\n","registered":"2019-12-15T12:13:35 -01:00","latitude":47.627647,"longitude":117.049918,"tags":["new issue"]}
|
||||
{"id":52,"isActive":false,"balance":"$1,884.02","picture":"http://placehold.it/32x32","age":35,"color":"blue","name":"Karen Nelson","gender":"female","email":"karennelson@chorizon.com","phone":"+1 (993) 528-3607","address":"930 Frank Court, Dunbar, New York, 8810","about":"Occaecat officia veniam consectetur aliqua laboris dolor irure nulla. Lorem ipsum sit nisi veniam mollit ea sint nisi irure. Eiusmod officia do laboris nostrud enim ullamco nulla officia in Lorem qui. Sint sunt incididunt quis reprehenderit incididunt. Sit dolore nulla consequat ea magna.\r\n","registered":"2014-06-23T09:21:44 -02:00","latitude":-59.059033,"longitude":76.565373,"tags":["new issue","bug"]}
|
||||
{"id":53,"isActive":true,"balance":"$3,559.55","picture":"http://placehold.it/32x32","age":32,"color":"brown","name":"Caitlin Burnett","gender":"female","email":"caitlinburnett@chorizon.com","phone":"+1 (945) 480-2796","address":"516 Senator Street, Emory, Iowa, 4145","about":"In aliqua ea esse in. Magna aute cupidatat culpa enim proident ad adipisicing laborum consequat exercitation nisi. Qui esse aliqua duis anim nulla esse enim nostrud ipsum tempor. Lorem deserunt ullamco do mollit culpa ipsum duis Lorem velit duis occaecat.\r\n","registered":"2019-01-09T02:26:31 -01:00","latitude":-82.774237,"longitude":42.316194,"tags":["bug","good first issue"]}
|
||||
{"id":54,"isActive":true,"balance":"$2,113.29","picture":"http://placehold.it/32x32","age":28,"color":"Green","name":"Richards Walls","gender":"male","email":"richardswalls@chorizon.com","phone":"+1 (865) 517-2982","address":"959 Brightwater Avenue, Stevens, Nevada, 2968","about":"Ad aute Lorem non pariatur anim ullamco ad amet eiusmod tempor velit. Mollit et tempor nisi aute adipisicing exercitation mollit do amet amet est fugiat enim. Ex voluptate nulla id tempor officia ullamco cillum dolor irure irure mollit et magna nisi. Pariatur voluptate qui laboris dolor id. Eu ipsum nulla dolore aute voluptate deserunt anim aliqua. Ut enim enim velit officia est nisi. Duis amet ut veniam aliquip minim tempor Lorem amet Lorem dolor duis.\r\n","registered":"2014-09-25T06:51:22 -02:00","latitude":80.09202,"longitude":87.49759,"tags":["wontfix","wontfix","bug"]}
|
||||
{"id":55,"isActive":true,"balance":"$1,977.66","picture":"http://placehold.it/32x32","age":36,"color":"brown","name":"Combs Stanley","gender":"male","email":"combsstanley@chorizon.com","phone":"+1 (827) 419-2053","address":"153 Beverley Road, Siglerville, South Carolina, 3666","about":"Commodo ullamco consequat eu ipsum eiusmod aute voluptate in. Ea laboris id deserunt nostrud pariatur et laboris minim tempor quis qui consequat non esse. Magna elit commodo mollit veniam Lorem enim nisi pariatur. Nisi non nisi adipisicing ea ipsum laborum dolore cillum. Amet do nisi esse laboris ipsum proident non veniam ullamco ea cupidatat sunt. Aliquip aute cillum quis laboris consectetur enim eiusmod nisi non id ullamco cupidatat sunt.\r\n","registered":"2019-08-22T07:53:15 -02:00","latitude":78.386181,"longitude":143.661058,"tags":[]}
|
||||
{"id":56,"isActive":false,"balance":"$3,886.12","picture":"http://placehold.it/32x32","age":23,"color":"brown","name":"Tucker Barry","gender":"male","email":"tuckerbarry@chorizon.com","phone":"+1 (808) 544-3433","address":"805 Jamaica Avenue, Cornfields, Minnesota, 3689","about":"Enim est sunt ullamco nulla aliqua commodo. Enim minim veniam non fugiat id tempor ad velit quis velit ad sunt consectetur laborum. Cillum deserunt tempor est adipisicing Lorem esse qui. Magna quis sunt cillum ea officia adipisicing eiusmod eu et nisi consectetur.\r\n","registered":"2016-08-29T07:28:00 -02:00","latitude":71.701551,"longitude":9.903068,"tags":[]}
|
||||
{"id":57,"isActive":false,"balance":"$1,844.56","picture":"http://placehold.it/32x32","age":20,"color":"Green","name":"Kaitlin Conner","gender":"female","email":"kaitlinconner@chorizon.com","phone":"+1 (862) 467-2666","address":"501 Knight Court, Joppa, Rhode Island, 274","about":"Occaecat id reprehenderit pariatur ea. Incididunt laborum reprehenderit ipsum velit labore excepteur nostrud voluptate officia ut culpa. Sint sunt in qui duis cillum aliqua do ullamco. Non do aute excepteur non labore sint consectetur tempor ad ea fugiat commodo labore. Dolor tempor culpa Lorem voluptate esse nostrud anim tempor irure reprehenderit. Deserunt ipsum cillum fugiat ut labore labore anim. In aliqua sunt dolore irure reprehenderit voluptate commodo consequat mollit amet laboris sit anim.\r\n","registered":"2019-05-30T06:38:24 -02:00","latitude":15.613464,"longitude":171.965629,"tags":[]}
|
||||
{"id":58,"isActive":true,"balance":"$2,876.10","picture":"http://placehold.it/32x32","age":38,"color":"Green","name":"Mamie Fischer","gender":"female","email":"mamiefischer@chorizon.com","phone":"+1 (948) 545-3901","address":"599 Hunterfly Place, Haena, Georgia, 6005","about":"Cillum eu aliquip ipsum anim in dolore labore ea. Laboris velit esse ea ea aute do adipisicing ullamco elit laborum aute tempor. Esse consectetur quis irure occaecat nisi cillum et consectetur cillum cillum quis quis commodo.\r\n","registered":"2019-05-27T05:07:10 -02:00","latitude":70.915079,"longitude":-48.813584,"tags":["bug","wontfix","wontfix","good first issue"]}
|
||||
{"id":59,"isActive":true,"balance":"$1,921.58","picture":"http://placehold.it/32x32","age":31,"color":"Green","name":"Harper Carson","gender":"male","email":"harpercarson@chorizon.com","phone":"+1 (912) 430-3243","address":"883 Dennett Place, Knowlton, New Mexico, 9219","about":"Exercitation minim esse proident cillum velit et deserunt incididunt adipisicing minim. Cillum Lorem consectetur laborum id consequat exercitation velit. Magna dolor excepteur sunt deserunt dolor ullamco non sint proident ipsum. Reprehenderit voluptate sit veniam consectetur ea sunt duis labore deserunt ipsum aute. Eiusmod aliqua anim voluptate id duis tempor aliqua commodo sunt. Do officia ea consectetur nostrud eiusmod laborum.\r\n","registered":"2019-12-07T07:33:15 -01:00","latitude":-60.812605,"longitude":-27.129016,"tags":["bug","new issue"]}
|
||||
{"id":60,"isActive":true,"balance":"$1,770.93","picture":"http://placehold.it/32x32","age":23,"color":"brown","name":"Jody Herrera","gender":"female","email":"jodyherrera@chorizon.com","phone":"+1 (890) 583-3222","address":"261 Jay Street, Strykersville, Ohio, 9248","about":"Sit adipisicing pariatur irure non sint cupidatat ex ipsum pariatur exercitation ea. Enim consequat enim eu eu sint eu elit ex esse aliquip. Pariatur ipsum dolore veniam nisi id tempor elit exercitation dolore ad fugiat labore velit.\r\n","registered":"2016-05-21T01:00:02 -02:00","latitude":-36.846586,"longitude":131.156223,"tags":[]}
|
||||
{"id":61,"isActive":false,"balance":"$2,813.41","picture":"http://placehold.it/32x32","age":37,"color":"Green","name":"Charles Castillo","gender":"male","email":"charlescastillo@chorizon.com","phone":"+1 (934) 467-2108","address":"675 Morton Street, Rew, Pennsylvania, 137","about":"Velit amet laborum amet sunt sint sit cupidatat deserunt dolor laborum consectetur veniam. Minim cupidatat amet exercitation nostrud ex deserunt ad Lorem amet aute consectetur labore reprehenderit. Minim mollit aliqua et deserunt ex nisi. Id irure dolor labore consequat ipsum consectetur.\r\n","registered":"2019-06-10T02:54:22 -02:00","latitude":-16.423202,"longitude":-146.293752,"tags":["new issue","new issue"]}
|
||||
{"id":62,"isActive":true,"balance":"$3,341.35","picture":"http://placehold.it/32x32","age":33,"color":"blue","name":"Estelle Ramirez","gender":"female","email":"estelleramirez@chorizon.com","phone":"+1 (816) 459-2073","address":"636 Nolans Lane, Camptown, California, 7794","about":"Dolor proident incididunt ex labore quis ullamco duis. Sit esse laboris nisi eu voluptate nulla cupidatat nulla fugiat veniam. Culpa cillum est esse dolor consequat. Pariatur ex sit irure qui do fugiat. Fugiat culpa veniam est nisi excepteur quis cupidatat et minim in esse minim dolor et. Anim aliquip labore dolor occaecat nisi sunt dolore pariatur veniam nostrud est ut.\r\n","registered":"2015-02-14T01:05:50 -01:00","latitude":-46.591249,"longitude":-83.385587,"tags":["good first issue","bug"]}
|
||||
{"id":63,"isActive":true,"balance":"$2,478.30","picture":"http://placehold.it/32x32","age":21,"color":"blue","name":"Knowles Hebert","gender":"male","email":"knowleshebert@chorizon.com","phone":"+1 (819) 409-2308","address":"361 Kathleen Court, Gratton, Connecticut, 7254","about":"Esse mollit nulla eiusmod esse duis non proident excepteur labore. Nisi ex culpa do mollit dolor ea deserunt elit anim ipsum nostrud. Cupidatat nostrud duis ipsum dolore amet et. Veniam in cillum ea cillum deserunt excepteur officia laboris nulla. Commodo incididunt aliquip qui sunt dolore occaecat labore do laborum irure. Labore culpa duis pariatur reprehenderit ad laboris occaecat anim cillum et fugiat ea.\r\n","registered":"2016-03-08T08:34:52 -01:00","latitude":71.042482,"longitude":152.460406,"tags":["good first issue","wontfix"]}
|
||||
{"id":64,"isActive":false,"balance":"$2,559.09","picture":"http://placehold.it/32x32","age":28,"color":"brown","name":"Thelma Mckenzie","gender":"female","email":"thelmamckenzie@chorizon.com","phone":"+1 (941) 596-2777","address":"202 Leonard Street, Riverton, Illinois, 8577","about":"Non ad ipsum elit commodo fugiat Lorem ipsum reprehenderit. Commodo incididunt officia cillum eiusmod officia proident ea incididunt ullamco magna commodo consectetur dolor. Nostrud esse nisi ea laboris. Veniam et dolore nulla excepteur pariatur laborum non. Eiusmod reprehenderit do tempor esse eu eu aliquip. Magna quis consectetur ipsum adipisicing mollit elit ad elit.\r\n","registered":"2020-04-14T12:43:06 -02:00","latitude":16.026129,"longitude":105.464476,"tags":[]}
|
||||
{"id":65,"isActive":true,"balance":"$1,025.08","picture":"http://placehold.it/32x32","age":34,"color":"blue","name":"Carole Rowland","gender":"female","email":"carolerowland@chorizon.com","phone":"+1 (862) 558-3448","address":"941 Melba Court, Bluetown, Florida, 9555","about":"Ullamco occaecat ipsum aliqua sit proident eu. Occaecat ut consectetur proident culpa aliqua excepteur quis qui anim irure sit proident mollit irure. Proident cupidatat deserunt dolor adipisicing.\r\n","registered":"2014-12-01T05:55:35 -01:00","latitude":-0.191998,"longitude":43.389652,"tags":["wontfix"]}
|
||||
{"id":66,"isActive":true,"balance":"$1,061.49","picture":"http://placehold.it/32x32","age":35,"color":"brown","name":"Higgins Aguilar","gender":"male","email":"higginsaguilar@chorizon.com","phone":"+1 (911) 540-3791","address":"132 Sackman Street, Layhill, Guam, 8729","about":"Anim ea dolore exercitation minim. Proident cillum non deserunt cupidatat veniam non occaecat aute ullamco irure velit laboris ex aliquip. Voluptate incididunt non ex nulla est ipsum. Amet anim do velit sunt irure sint minim nisi occaecat proident tempor elit exercitation nostrud.\r\n","registered":"2015-04-05T02:10:07 -02:00","latitude":74.702813,"longitude":151.314972,"tags":["bug"]}
|
||||
{"id":67,"isActive":true,"balance":"$3,510.14","picture":"http://placehold.it/32x32","age":28,"color":"brown","name":"Ilene Gillespie","gender":"female","email":"ilenegillespie@chorizon.com","phone":"+1 (937) 575-2676","address":"835 Lake Street, Naomi, Alabama, 4131","about":"Quis laborum consequat id cupidatat exercitation aute ad ex nulla dolore velit qui proident minim. Et do consequat nisi eiusmod exercitation exercitation enim voluptate elit ullamco. Cupidatat ut adipisicing consequat aute est voluptate sit ipsum culpa ullamco. Ex pariatur ex qui quis qui.\r\n","registered":"2015-06-28T09:41:45 -02:00","latitude":71.573342,"longitude":-95.295989,"tags":["wontfix","wontfix"]}
|
||||
{"id":68,"isActive":false,"balance":"$1,539.98","picture":"http://placehold.it/32x32","age":24,"color":"Green","name":"Angelina Dyer","gender":"female","email":"angelinadyer@chorizon.com","phone":"+1 (948) 574-3949","address":"575 Division Place, Gorham, Louisiana, 3458","about":"Cillum magna eu est veniam incididunt laboris laborum elit mollit incididunt proident non mollit. Dolor mollit culpa ullamco dolore aliqua adipisicing culpa officia. Reprehenderit minim nisi fugiat consectetur dolore.\r\n","registered":"2014-07-08T06:34:36 -02:00","latitude":-85.649593,"longitude":66.126018,"tags":["good first issue"]}
|
||||
{"id":69,"isActive":true,"balance":"$3,367.69","picture":"http://placehold.it/32x32","age":30,"color":"brown","name":"Marks Burt","gender":"male","email":"marksburt@chorizon.com","phone":"+1 (895) 497-3138","address":"819 Village Road, Wadsworth, Delaware, 6099","about":"Fugiat tempor aute voluptate proident exercitation tempor esse dolor id. Duis aliquip exercitation Lorem elit magna sint sit. Culpa adipisicing occaecat aliqua officia reprehenderit laboris sint aliquip. Magna do sunt consequat excepteur nisi do commodo non. Cillum officia nostrud consequat excepteur elit proident in. Tempor ipsum in ut qui cupidatat exercitation est nulla exercitation voluptate.\r\n","registered":"2014-08-31T06:12:18 -02:00","latitude":26.854112,"longitude":-143.313948,"tags":["good first issue"]}
|
||||
{"id":70,"isActive":false,"balance":"$3,755.72","picture":"http://placehold.it/32x32","age":23,"color":"blue","name":"Glass Perkins","gender":"male","email":"glassperkins@chorizon.com","phone":"+1 (923) 486-3725","address":"899 Roosevelt Court, Belleview, Idaho, 1737","about":"Esse magna id labore sunt qui eu enim esse cillum consequat enim eu culpa enim. Duis veniam cupidatat deserunt sunt irure ad Lorem proident aliqua mollit. Laborum mollit aute nulla est. Sunt id proident incididunt ipsum et dolor consectetur laborum enim dolor officia dolore laborum. Est commodo duis et ea consequat labore id id eu aliqua. Qui veniam sit eu aliquip ad sit dolor ullamco et laborum voluptate quis fugiat ex. Exercitation dolore cillum amet ad nisi consectetur occaecat sit aliqua laborum qui proident aliqua exercitation.\r\n","registered":"2015-05-22T05:44:33 -02:00","latitude":54.27147,"longitude":-65.065604,"tags":["wontfix"]}
|
||||
{"id":71,"isActive":true,"balance":"$3,381.63","picture":"http://placehold.it/32x32","age":38,"color":"Green","name":"Candace Sawyer","gender":"female","email":"candacesawyer@chorizon.com","phone":"+1 (830) 404-2636","address":"334 Arkansas Drive, Bordelonville, Tennessee, 8449","about":"Et aliqua elit incididunt et aliqua. Deserunt ut elit proident ullamco ut. Ex exercitation amet non eu reprehenderit ea voluptate qui sit reprehenderit ad sint excepteur.\r\n","registered":"2014-04-04T08:45:00 -02:00","latitude":6.484262,"longitude":-37.054928,"tags":["new issue","new issue"]}
|
||||
{"id":72,"isActive":true,"balance":"$1,640.98","picture":"http://placehold.it/32x32","age":27,"color":"Green","name":"Hendricks Martinez","gender":"male","email":"hendricksmartinez@chorizon.com","phone":"+1 (857) 566-3245","address":"636 Agate Court, Newry, Utah, 3304","about":"Do sit culpa amet incididunt officia enim occaecat incididunt excepteur enim tempor deserunt qui. Excepteur adipisicing anim consectetur adipisicing proident anim laborum qui. Aliquip nostrud cupidatat sit ullamco.\r\n","registered":"2018-06-15T10:36:11 -02:00","latitude":86.746034,"longitude":10.347893,"tags":["new issue"]}
|
||||
{"id":73,"isActive":false,"balance":"$1,239.74","picture":"http://placehold.it/32x32","age":38,"color":"blue","name":"Eleanor Shepherd","gender":"female","email":"eleanorshepherd@chorizon.com","phone":"+1 (894) 567-2617","address":"670 Lafayette Walk, Darlington, Palau, 8803","about":"Adipisicing ad incididunt id veniam magna cupidatat et labore eu deserunt mollit. Lorem voluptate exercitation elit eu aliquip cupidatat occaecat anim excepteur reprehenderit est est. Ipsum excepteur ea mollit qui nisi laboris ex qui. Cillum velit culpa culpa commodo laboris nisi Lorem non elit deserunt incididunt. Officia quis velit nulla sint incididunt duis mollit tempor adipisicing qui officia eu nisi Lorem. Do proident pariatur ex enim nostrud eu aute esse deserunt eu velit quis culpa exercitation. Occaecat ad cupidatat ullamco consequat duis anim deserunt occaecat aliqua sunt consectetur ipsum magna.\r\n","registered":"2020-02-29T12:15:28 -01:00","latitude":35.749621,"longitude":-94.40842,"tags":["good first issue","new issue","new issue","bug"]}
|
||||
{"id":74,"isActive":true,"balance":"$1,180.90","picture":"http://placehold.it/32x32","age":36,"color":"Green","name":"Stark Wong","gender":"male","email":"starkwong@chorizon.com","phone":"+1 (805) 575-3055","address":"522 Bond Street, Bawcomville, Wisconsin, 324","about":"Aute qui sit incididunt eu adipisicing exercitation sunt nostrud. Id laborum incididunt proident ipsum est cillum esse. Officia ullamco eu ut Lorem do minim ea dolor consequat sit eu est voluptate. Id commodo cillum enim culpa aliquip ullamco nisi Lorem cillum ipsum cupidatat anim officia eu. Dolore sint elit labore pariatur. Officia duis nulla voluptate et nulla ut voluptate laboris eu commodo veniam qui veniam.\r\n","registered":"2020-01-25T10:47:48 -01:00","latitude":-80.452139,"longitude":160.72546,"tags":["wontfix"]}
|
||||
{"id":75,"isActive":false,"balance":"$1,913.42","picture":"http://placehold.it/32x32","age":24,"color":"Green","name":"Emma Jacobs","gender":"female","email":"emmajacobs@chorizon.com","phone":"+1 (899) 554-3847","address":"173 Tapscott Street, Esmont, Maine, 7450","about":"Laboris consequat consectetur tempor labore ullamco ullamco voluptate quis quis duis ut ad. In est irure quis amet sunt nulla ad ut sit labore ut eu quis duis. Nostrud cupidatat aliqua sunt occaecat minim id consequat officia deserunt laborum. Ea dolor reprehenderit laborum veniam exercitation est nostrud excepteur laborum minim id qui et.\r\n","registered":"2019-03-29T06:24:13 -01:00","latitude":-35.53722,"longitude":155.703874,"tags":[]}
|
||||
{"id":77,"isActive":false,"balance":"$1,274.29","picture":"http://placehold.it/32x32","age":25,"color":"Red","name":"孫武","gender":"male","email":"SunTzu@chorizon.com","phone":"+1 (810) 407-3258","address":"吴國","about":"孫武(前544年-前470年或前496年),字長卿,春秋時期齊國人,著名軍事家、政治家,兵家代表人物。兵書《孫子兵法》的作者,後人尊稱為孫子、兵聖、東方兵聖,山東、蘇州等地尚有祀奉孫武的廟宇兵聖廟。其族人为樂安孫氏始祖,次子孙明为富春孫氏始祖。\r\n","registered":"2014-10-20T10:13:32 -02:00","latitude":17.11935,"longitude":65.38197,"tags":["new issue","wontfix"]}
|
@ -1,59 +0,0 @@
|
||||
{
|
||||
"rankingRules": [
|
||||
"typo",
|
||||
"words",
|
||||
"proximity",
|
||||
"attribute",
|
||||
"wordsPosition",
|
||||
"exactness"
|
||||
],
|
||||
"distinctAttribute": "email",
|
||||
"searchableAttributes": [
|
||||
"balance",
|
||||
"picture",
|
||||
"age",
|
||||
"color",
|
||||
"name",
|
||||
"gender",
|
||||
"email",
|
||||
"phone",
|
||||
"address",
|
||||
"about",
|
||||
"registered",
|
||||
"latitude",
|
||||
"longitude",
|
||||
"tags"
|
||||
],
|
||||
"displayedAttributes": [
|
||||
"id",
|
||||
"isActive",
|
||||
"balance",
|
||||
"picture",
|
||||
"age",
|
||||
"color",
|
||||
"name",
|
||||
"gender",
|
||||
"email",
|
||||
"phone",
|
||||
"address",
|
||||
"about",
|
||||
"registered",
|
||||
"latitude",
|
||||
"longitude",
|
||||
"tags"
|
||||
],
|
||||
"stopWords": [
|
||||
"in",
|
||||
"ad"
|
||||
],
|
||||
"synonyms": {
|
||||
"wolverine": ["xmen", "logan"],
|
||||
"logan": ["wolverine", "xmen"]
|
||||
},
|
||||
"attributesForFaceting": [
|
||||
"gender",
|
||||
"color",
|
||||
"tags",
|
||||
"isActive"
|
||||
]
|
||||
}
|
@ -1,3 +0,0 @@
|
||||
{"status":"processed","updateId":0,"type":{"name":"Settings","settings":{"ranking_rules":{"Update":["Typo","Words","Proximity","Attribute","WordsPosition","Exactness"]},"distinct_attribute":"Nothing","primary_key":"Nothing","searchable_attributes":"Nothing","displayed_attributes":"Nothing","stop_words":"Nothing","synonyms":"Nothing","attributes_for_faceting":"Nothing"}}}
|
||||
{"status":"processed","updateId":1,"type":{"name":"DocumentsAddition","number":77}}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user