Remove the useless cli subcrate

This commit is contained in:
Kerollmops 2023-01-16 17:08:43 +01:00
parent e68758cec4
commit 03a82136dc
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
4 changed files with 7 additions and 935 deletions

357
Cargo.lock generated
View File

@ -289,30 +289,12 @@ dependencies = [
"alloc-no-stdlib",
]
[[package]]
name = "android_system_properties"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
dependencies = [
"libc",
]
[[package]]
name = "anes"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
[[package]]
name = "ansi_term"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2"
dependencies = [
"winapi",
]
[[package]]
name = "anyhow"
version = "1.0.68"
@ -697,20 +679,6 @@ dependencies = [
"whatlang",
]
[[package]]
name = "chrono"
version = "0.4.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "16b0a3d9ed01224b22057780a37bb8c5dbfe1be8ba48678e7bf57ec4b385411f"
dependencies = [
"iana-time-zone",
"js-sys",
"num-integer",
"num-traits",
"wasm-bindgen",
"winapi",
]
[[package]]
name = "ciborium"
version = "0.2.0"
@ -747,21 +715,6 @@ dependencies = [
"generic-array",
]
[[package]]
name = "clap"
version = "2.34.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c"
dependencies = [
"ansi_term",
"atty",
"bitflags",
"strsim 0.8.0",
"textwrap 0.11.0",
"unicode-width",
"vec_map",
]
[[package]]
name = "clap"
version = "3.2.23"
@ -774,9 +727,9 @@ dependencies = [
"clap_lex 0.2.4",
"indexmap",
"once_cell",
"strsim 0.10.0",
"strsim",
"termcolor",
"textwrap 0.16.0",
"textwrap",
]
[[package]]
@ -790,7 +743,7 @@ dependencies = [
"clap_lex 0.3.0",
"is-terminal",
"once_cell",
"strsim 0.10.0",
"strsim",
"termcolor",
]
@ -800,7 +753,7 @@ version = "3.2.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ea0c8bce528c4be4da13ea6fead8965e95b6073585a2f05204bd8f4119f82a65"
dependencies = [
"heck 0.4.0",
"heck",
"proc-macro-error",
"proc-macro2 1.0.49",
"quote 1.0.23",
@ -813,7 +766,7 @@ version = "4.0.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0177313f9f02afc995627906bbd8967e2be069f5261954222dac78290c2b9014"
dependencies = [
"heck 0.4.0",
"heck",
"proc-macro-error",
"proc-macro2 1.0.49",
"quote 1.0.23",
@ -838,61 +791,6 @@ dependencies = [
"os_str_bytes",
]
[[package]]
name = "cli"
version = "1.0.0"
dependencies = [
"bimap",
"byte-unit",
"color-eyre",
"csv",
"eyre",
"indicatif",
"milli 1.0.0",
"mimalloc",
"serde",
"serde_json",
"stderrlog",
"structopt",
]
[[package]]
name = "codespan-reporting"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e"
dependencies = [
"termcolor",
"unicode-width",
]
[[package]]
name = "color-eyre"
version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a667583cca8c4f8436db8de46ea8233c42a7d9ae424a82d338f2e4675229204"
dependencies = [
"backtrace",
"color-spantrace",
"eyre",
"indenter",
"once_cell",
"owo-colors",
"tracing-error",
]
[[package]]
name = "color-spantrace"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ba75b3d9449ecdccb27ecbc479fdc0b87fa2dd43d2f8298f9bf0e59aacc8dce"
dependencies = [
"once_cell",
"owo-colors",
"tracing-core",
"tracing-error",
]
[[package]]
name = "concat-arrays"
version = "0.1.2"
@ -913,7 +811,6 @@ dependencies = [
"encode_unicode",
"lazy_static",
"libc",
"unicode-width",
"windows-sys",
]
@ -1135,50 +1032,6 @@ dependencies = [
"memchr",
]
[[package]]
name = "cxx"
version = "1.0.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51d1075c37807dcf850c379432f0df05ba52cc30f279c5cfc43cc221ce7f8579"
dependencies = [
"cc",
"cxxbridge-flags",
"cxxbridge-macro",
"link-cplusplus",
]
[[package]]
name = "cxx-build"
version = "1.0.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5044281f61b27bc598f2f6647d480aed48d2bf52d6eb0b627d84c0361b17aa70"
dependencies = [
"cc",
"codespan-reporting",
"once_cell",
"proc-macro2 1.0.49",
"quote 1.0.23",
"scratch",
"syn 1.0.107",
]
[[package]]
name = "cxxbridge-flags"
version = "1.0.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61b50bc93ba22c27b0d31128d2d130a0a6b3d267ae27ef7e4fae2167dfe8781c"
[[package]]
name = "cxxbridge-macro"
version = "1.0.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39e61fda7e62115119469c7b3591fd913ecca96fb766cfd3f2e2502ab7bc87a5"
dependencies = [
"proc-macro2 1.0.49",
"quote 1.0.23",
"syn 1.0.107",
]
[[package]]
name = "darling"
version = "0.14.2"
@ -1199,7 +1052,7 @@ dependencies = [
"ident_case",
"proc-macro2 1.0.49",
"quote 1.0.23",
"strsim 0.10.0",
"strsim",
"syn 1.0.107",
]
@ -1495,16 +1348,6 @@ dependencies = [
"libc",
]
[[package]]
name = "eyre"
version = "0.6.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c2b6b5a29c02cdc822728b7d7b8ae1bab3e3b05d44522770ddd49722eeac7eb"
dependencies = [
"indenter",
"once_cell",
]
[[package]]
name = "fastrand"
version = "1.8.0"
@ -1907,15 +1750,6 @@ dependencies = [
"stable_deref_trait",
]
[[package]]
name = "heck"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c"
dependencies = [
"unicode-segmentation",
]
[[package]]
name = "heck"
version = "0.4.0"
@ -2066,30 +1900,6 @@ dependencies = [
"tokio-rustls",
]
[[package]]
name = "iana-time-zone"
version = "0.1.53"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "64c122667b287044802d6ce17ee2ddf13207ed924c712de9a66a5814d5b64765"
dependencies = [
"android_system_properties",
"core-foundation-sys",
"iana-time-zone-haiku",
"js-sys",
"wasm-bindgen",
"winapi",
]
[[package]]
name = "iana-time-zone-haiku"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0703ae284fc167426161c2e3f1da3ea71d94b21bedbcc9494e92b28e334e3dca"
dependencies = [
"cxx",
"cxx-build",
]
[[package]]
name = "ident_case"
version = "1.0.1"
@ -2106,12 +1916,6 @@ dependencies = [
"unicode-normalization",
]
[[package]]
name = "indenter"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce23b50ad8242c51a442f3ff322d56b02f08852c77e4c0b4d3fd684abc89c683"
[[package]]
name = "index-scheduler"
version = "1.0.0"
@ -2152,18 +1956,6 @@ dependencies = [
"serde",
]
[[package]]
name = "indicatif"
version = "0.17.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cef509aa9bc73864d6756f0d34d35504af3cf0844373afe9b8669a5b8005a729"
dependencies = [
"console",
"number_prefix",
"portable-atomic",
"unicode-width",
]
[[package]]
name = "insta"
version = "1.24.1"
@ -2544,15 +2336,6 @@ dependencies = [
"yada",
]
[[package]]
name = "link-cplusplus"
version = "1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ecd207c9c713c34f95a097a5b029ac2ce6010530c7b49d7fea24d977dede04f5"
dependencies = [
"cc",
]
[[package]]
name = "linked-hash-map"
version = "0.5.6"
@ -3071,12 +2854,6 @@ dependencies = [
"libc",
]
[[package]]
name = "number_prefix"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
[[package]]
name = "object"
version = "0.27.1"
@ -3140,12 +2917,6 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
[[package]]
name = "owo-colors"
version = "3.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c1b04fb49957986fdce4d6ee7a65027d55d4b6d2265e5848bbb507b58ccfdb6f"
[[package]]
name = "page_size"
version = "0.4.2"
@ -3399,12 +3170,6 @@ dependencies = [
"plotters-backend",
]
[[package]]
name = "portable-atomic"
version = "0.3.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26f6a7b87c2e435a3241addceeeff740ff8b7e76b74c13bf9acb17fa454ea00b"
[[package]]
name = "ppv-lite86"
version = "0.2.17"
@ -3836,12 +3601,6 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]]
name = "scratch"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ddccb15bcce173023b3fedd9436f882a0739b8dfb45e4f6b6002bee5929f61b2"
[[package]]
name = "sct"
version = "0.7.0"
@ -3958,15 +3717,6 @@ dependencies = [
"digest",
]
[[package]]
name = "sharded-slab"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "900fba806f70c630b0a382d0d825e17a0f19fcd059a2ade1ff237bcddf446b31"
dependencies = [
"lazy_static",
]
[[package]]
name = "signal-hook-registry"
version = "1.4.0"
@ -4090,55 +3840,12 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]]
name = "stderrlog"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69a26bbf6de627d389164afa9783739b56746c6c72c4ed16539f4ff54170327b"
dependencies = [
"atty",
"chrono",
"log",
"termcolor",
"thread_local",
]
[[package]]
name = "strsim"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
[[package]]
name = "strsim"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
[[package]]
name = "structopt"
version = "0.3.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c6b5c64445ba8094a6ab0c3cd2ad323e07171012d9c98b0b15651daf1787a10"
dependencies = [
"clap 2.34.0",
"lazy_static",
"structopt-derive",
]
[[package]]
name = "structopt-derive"
version = "0.4.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dcb5ae327f9cc13b68763b5749770cb9e048a99bd9dfdfa58d0cf05d5f64afe0"
dependencies = [
"heck 0.3.3",
"proc-macro-error",
"proc-macro2 1.0.49",
"quote 1.0.23",
"syn 1.0.107",
]
[[package]]
name = "subtle"
version = "2.4.1"
@ -4246,15 +3953,6 @@ dependencies = [
"winapi-util",
]
[[package]]
name = "textwrap"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
dependencies = [
"unicode-width",
]
[[package]]
name = "textwrap"
version = "0.16.0"
@ -4281,15 +3979,6 @@ dependencies = [
"syn 1.0.107",
]
[[package]]
name = "thread_local"
version = "1.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5516c27b78311c50bf42c071425c560ac799b11c30b31f87e3081965fe5e0180"
dependencies = [
"once_cell",
]
[[package]]
name = "time"
version = "0.3.17"
@ -4443,28 +4132,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24eb03ba0eab1fd845050058ce5e616558e8f8d8fca633e6b163fe25c797213a"
dependencies = [
"once_cell",
"valuable",
]
[[package]]
name = "tracing-error"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d686ec1c0f384b1277f097b2f279a2ecc11afe8c133c1aabf036a27cb4cd206e"
dependencies = [
"tracing",
"tracing-subscriber",
]
[[package]]
name = "tracing-subscriber"
version = "0.3.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a6176eae26dd70d0c919749377897b54a9276bd7061339665dd68777926b5a70"
dependencies = [
"sharded-slab",
"thread_local",
"tracing-core",
]
[[package]]
@ -4587,24 +4254,12 @@ dependencies = [
"serde",
]
[[package]]
name = "valuable"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"
[[package]]
name = "vcpkg"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
[[package]]
name = "vec_map"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191"
[[package]]
name = "vergen"
version = "7.5.0"

View File

@ -13,8 +13,7 @@ members = [
"filter-parser",
"flatten-serde-json",
"json-depth-checker",
"benchmarks",
"cli"
"benchmarks"
]
[profile.release]

View File

@ -1,23 +0,0 @@
[package]
name = "cli"
version = "1.0.0"
edition = "2018"
description = "A CLI to interact with a milli index"
publish = false
[dependencies]
bimap = "0.6.2"
byte-unit = { version = "4.0.14", default-features = false, features = ["std", "serde"] }
color-eyre = "0.6.2"
csv = "1.1.6"
eyre = "0.6.8"
indicatif = "0.17.1"
milli = { path = "../milli", default-features = false }
mimalloc = { version = "0.1.29", default-features = false }
serde = "1.0.145"
serde_json = "1.0.85"
stderrlog = "0.5.3"
structopt = "0.3.26"
[features]
default = ["milli/default"]

View File

@ -1,559 +0,0 @@
use std::collections::BTreeMap;
use std::fmt::Display;
use std::fs::File;
use std::io::{stdin, BufRead, BufReader, Cursor, Read, Write};
use std::path::PathBuf;
use std::str::FromStr;
use std::time::{Duration, Instant};
use byte_unit::Byte;
use eyre::Result;
use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use milli::update::UpdateIndexingStep::{
ComputeIdsAndMergeDocuments, IndexDocuments, MergeDataIntoFinalDatabase, RemapDocumentAddition,
};
use milli::update::{self, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig};
use milli::{heed, CriterionImplementationStrategy, Index, Object};
use structopt::StructOpt;
#[global_allocator]
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
#[derive(Debug, StructOpt)]
#[structopt(name = "Milli CLI", about = "A simple CLI to manipulate a milli index.")]
struct Cli {
#[structopt(short, long, default_value = ".")]
index_path: PathBuf,
#[structopt(short = "s", long, default_value = "100GiB")]
index_size: Byte,
/// Verbose mode (-v, -vv, -vvv, etc.)
#[structopt(short, long, parse(from_occurrences))]
verbose: usize,
#[structopt(subcommand)]
subcommand: Command,
}
#[derive(Debug, StructOpt)]
enum Command {
Documents {
#[structopt(subcommand)]
cmd: Documents,
},
Search(Search),
Settings {
#[structopt(subcommand)]
cmd: Settings,
},
}
impl Performer for Command {
fn perform(self, index: Index) -> Result<()> {
match self {
Command::Documents { cmd } => cmd.perform(index),
Command::Search(cmd) => cmd.perform(index),
Command::Settings { cmd } => cmd.perform(index),
}
}
}
#[derive(Debug, StructOpt)]
enum Settings {
Update(SettingsUpdate),
Show,
}
impl Settings {
fn show(&self, index: Index) -> Result<()> {
let txn = index.read_txn()?;
let displayed_attributes = index
.displayed_fields(&txn)?
.map(|fields| fields.into_iter().map(String::from).collect());
let searchable_attributes: Option<Vec<_>> = index
.searchable_fields(&txn)?
.map(|fields| fields.into_iter().map(String::from).collect());
let filterable_attributes: Vec<_> = index.filterable_fields(&txn)?.into_iter().collect();
let sortable_attributes: Vec<_> = index.sortable_fields(&txn)?.into_iter().collect();
let criteria: Vec<_> = index.criteria(&txn)?.into_iter().map(|c| c.to_string()).collect();
let stop_words = index
.stop_words(&txn)?
.map(|stop_words| -> Result<Vec<_>> {
Ok(stop_words.stream().into_strs()?.into_iter().collect())
})
.transpose()?
.unwrap_or_default();
let distinct_field = index.distinct_field(&txn)?.map(String::from);
// in milli each word in the synonyms map were split on their separator. Since we lost
// this information we are going to put space between words.
let synonyms: BTreeMap<_, Vec<_>> = index
.synonyms(&txn)?
.iter()
.map(|(key, values)| {
(key.join(" "), values.iter().map(|value| value.join(" ")).collect())
})
.collect();
let exact_attributes = index.exact_attributes(&txn)?;
println!(
"displayed attributes:\n\t{}\nsearchable attributes:\n\t{}\nfilterable attributes:\n\t{}\nsortable attributes:\n\t{}\ncriterion:\n\t{}\nstop words:\n\t{}\ndistinct fields:\n\t{}\nsynonyms:\n\t{}\nexact attributes:\n\t{}\n",
displayed_attributes.unwrap_or_else(|| vec!["*".to_owned()]).join("\n\t"),
searchable_attributes.unwrap_or_else(|| vec!["*".to_owned()]).join("\n\t"),
filterable_attributes.join("\n\t"),
sortable_attributes.join("\n\t"),
criteria.join("\n\t"),
stop_words.join("\n\t"),
distinct_field.unwrap_or_default(),
synonyms.into_iter().map(|(k, v)| format!("\n\t{}:\n{:?}", k, v)).collect::<String>(),
exact_attributes.join("\n\t"),
);
Ok(())
}
}
impl Performer for Settings {
fn perform(self, index: Index) -> Result<()> {
match self {
Settings::Update(update) => update.perform(index),
Settings::Show => self.show(index),
}
}
}
#[derive(Debug, StructOpt)]
enum Documents {
Add(DocumentAddition),
}
impl Performer for Documents {
fn perform(self, index: Index) -> Result<()> {
match self {
Self::Add(addition) => addition.perform(index),
}
}
}
trait Performer {
fn perform(self, index: Index) -> Result<()>;
}
fn setup(opt: &Cli) -> Result<()> {
color_eyre::install()?;
stderrlog::new()
.verbosity(opt.verbose)
.show_level(false)
.timestamp(stderrlog::Timestamp::Off)
.init()?;
Ok(())
}
fn main() -> Result<()> {
let command = Cli::from_args();
setup(&command)?;
let mut options = heed::EnvOpenOptions::new();
options.map_size(command.index_size.get_bytes() as usize);
let index = milli::Index::new(options, command.index_path)?;
command.subcommand.perform(index)?;
Ok(())
}
#[derive(Debug)]
enum DocumentAdditionFormat {
Csv,
Json,
Jsonl,
}
impl FromStr for DocumentAdditionFormat {
type Err = eyre::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"csv" => Ok(Self::Csv),
"jsonl" => Ok(Self::Jsonl),
"json" => Ok(Self::Json),
other => eyre::bail!("invalid format: {}", other),
}
}
}
#[derive(Debug, StructOpt)]
struct DocumentAddition {
#[structopt(short, long, default_value = "json", possible_values = &["csv", "jsonl", "json"])]
format: DocumentAdditionFormat,
/// Path to the update file, if not present, will read from stdin.
#[structopt(short, long)]
path: Option<PathBuf>,
/// Specify the primary key.
#[structopt(long)]
primary: Option<String>,
/// Whether to generate missing document ids.
#[structopt(short, long)]
autogen_docids: bool,
/// Whether to update or replace the documents if they already exist.
#[structopt(short, long)]
update_documents: bool,
}
impl Performer for DocumentAddition {
fn perform(self, index: milli::Index) -> Result<()> {
let reader: Box<dyn Read> = match self.path {
Some(ref path) => {
let file = File::open(path)?;
Box::new(file)
}
None => Box::new(stdin()),
};
println!("parsing documents...");
let reader = BufReader::new(reader);
let documents = match self.format {
DocumentAdditionFormat::Csv => documents_from_csv(reader)?,
DocumentAdditionFormat::Json => documents_from_json(reader)?,
DocumentAdditionFormat::Jsonl => documents_from_jsonl(reader)?,
};
let reader = DocumentsBatchReader::from_reader(Cursor::new(documents))?;
println!("Adding {} documents to the index.", reader.documents_count());
let mut txn = index.write_txn()?;
let config = milli::update::IndexerConfig { log_every_n: Some(100), ..Default::default() };
let update_method = if self.update_documents {
IndexDocumentsMethod::UpdateDocuments
} else {
IndexDocumentsMethod::ReplaceDocuments
};
if let Some(primary) = self.primary {
let mut builder = update::Settings::new(&mut txn, &index, &config);
builder.set_primary_key(primary);
builder.execute(|_| (), || false).unwrap();
}
let indexing_config = IndexDocumentsConfig {
update_method,
autogenerate_docids: self.autogen_docids,
..Default::default()
};
let mut bars = Vec::new();
let progesses = MultiProgress::new();
for _ in 0..4 {
let bar = ProgressBar::hidden();
let bar = progesses.add(bar);
bars.push(bar);
}
let addition = milli::update::IndexDocuments::new(
&mut txn,
&index,
&config,
indexing_config,
|step| indexing_callback(step, &bars),
|| false,
)
.unwrap();
let (addition, user_error) = addition.add_documents(reader)?;
if let Err(error) = user_error {
return Err(error.into());
}
let result = addition.execute()?;
txn.commit()?;
println!("{:?}", result);
Ok(())
}
}
fn indexing_callback(step: milli::update::UpdateIndexingStep, bars: &[ProgressBar]) {
let step_index = step.step();
let bar = &bars[step_index];
if step_index > 0 {
let prev = &bars[step_index - 1];
if !prev.is_finished() {
prev.disable_steady_tick();
prev.finish();
}
}
let style = ProgressStyle::default_bar()
.progress_chars("##-")
.template("[eta: {eta_precise}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}")
.unwrap();
match step {
RemapDocumentAddition { documents_seen } => {
bar.set_style(ProgressStyle::default_spinner());
bar.set_message(format!("remapped {} documents so far.", documents_seen));
}
ComputeIdsAndMergeDocuments { documents_seen, total_documents } => {
bar.set_style(style);
bar.set_length(total_documents as u64);
bar.set_message("Merging documents...");
bar.set_position(documents_seen as u64);
}
IndexDocuments { documents_seen, total_documents } => {
bar.set_style(style);
bar.set_length(total_documents as u64);
bar.set_message("Indexing documents...");
bar.set_position(documents_seen as u64);
}
MergeDataIntoFinalDatabase { databases_seen, total_databases } => {
bar.set_style(style);
bar.set_length(total_databases as u64);
bar.set_message("Merging databases...");
bar.set_position(databases_seen as u64);
}
}
bar.enable_steady_tick(Duration::from_millis(200));
}
fn documents_from_jsonl(reader: impl Read) -> Result<Vec<u8>> {
let mut documents = DocumentsBatchBuilder::new(Vec::new());
let reader = BufReader::new(reader);
for result in serde_json::Deserializer::from_reader(reader).into_iter::<Object>() {
let object = result?;
documents.append_json_object(&object)?;
}
documents.into_inner().map_err(Into::into)
}
fn documents_from_json(reader: impl Read) -> Result<Vec<u8>> {
let mut documents = DocumentsBatchBuilder::new(Vec::new());
documents.append_json_array(reader)?;
documents.into_inner().map_err(Into::into)
}
fn documents_from_csv(reader: impl Read) -> Result<Vec<u8>> {
let csv = csv::Reader::from_reader(reader);
let mut documents = DocumentsBatchBuilder::new(Vec::new());
documents.append_csv(csv)?;
documents.into_inner().map_err(Into::into)
}
#[derive(Debug, Clone, Copy)]
struct SearchStrategyOption(CriterionImplementationStrategy);
impl FromStr for SearchStrategyOption {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"dynamic" => Ok(SearchStrategyOption(CriterionImplementationStrategy::Dynamic)),
"set" => Ok(SearchStrategyOption(CriterionImplementationStrategy::OnlySetBased)),
"iterative" => Ok(SearchStrategyOption(CriterionImplementationStrategy::OnlyIterative)),
_ => Err("could not parse {s} as a criterion implementation strategy, available options are `dynamic`, `set`, and `iterative`".to_owned()),
}
}
}
impl Display for SearchStrategyOption {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self.0 {
CriterionImplementationStrategy::OnlyIterative => Display::fmt("iterative", f),
CriterionImplementationStrategy::OnlySetBased => Display::fmt("set", f),
CriterionImplementationStrategy::Dynamic => Display::fmt("dynamic", f),
}
}
}
#[derive(Debug, StructOpt)]
struct Search {
query: Option<String>,
#[structopt(short, long)]
filter: Option<String>,
#[structopt(short, long)]
offset: Option<usize>,
#[structopt(short, long)]
limit: Option<usize>,
#[structopt(short, long, conflicts_with = "query")]
interactive: bool,
#[structopt(short, long)]
strategy: Option<SearchStrategyOption>,
}
impl Performer for Search {
fn perform(self, index: milli::Index) -> Result<()> {
if self.interactive {
let stdin = std::io::stdin();
let mut lines = stdin.lock().lines();
loop {
eprint!("> ");
std::io::stdout().flush()?;
match lines.next() {
Some(Ok(line)) => {
let now = Instant::now();
let jsons = Self::perform_single_search(
&index,
&Some(line),
&self.filter,
&self.offset,
&self.limit,
&self.strategy,
)?;
let time = now.elapsed();
let hits = serde_json::to_string_pretty(&jsons)?;
println!("{}", hits);
eprintln!("found {} results in {:.02?}", jsons.len(), time);
}
_ => break,
}
}
} else {
let now = Instant::now();
let jsons = Self::perform_single_search(
&index,
&self.query,
&self.filter,
&self.offset,
&self.limit,
&self.strategy,
)?;
let time = now.elapsed();
let hits = serde_json::to_string_pretty(&jsons)?;
println!("{}", hits);
eprintln!("found {} results in {:.02?}", jsons.len(), time);
}
Ok(())
}
}
impl Search {
fn perform_single_search(
index: &milli::Index,
query: &Option<String>,
filter: &Option<String>,
offset: &Option<usize>,
limit: &Option<usize>,
strategy: &Option<SearchStrategyOption>,
) -> Result<Vec<Object>> {
let txn = index.read_txn()?;
let mut search = index.search(&txn);
if let Some(ref query) = query {
search.query(query);
}
if let Some(ref filter) = filter {
if let Some(condition) = milli::Filter::from_str(filter)? {
search.filter(condition);
}
}
if let Some(offset) = offset {
search.offset(*offset);
}
if let Some(limit) = limit {
search.limit(*limit);
}
if let Some(strategy) = strategy {
search.criterion_implementation_strategy(strategy.0);
}
let result = search.execute()?;
let fields_ids_map = index.fields_ids_map(&txn)?;
let displayed_fields =
index.displayed_fields_ids(&txn)?.unwrap_or_else(|| fields_ids_map.ids().collect());
let documents = index.documents(&txn, result.documents_ids)?;
let mut jsons = Vec::new();
for (_, obkv) in documents {
let json = milli::obkv_to_json(&displayed_fields, &fields_ids_map, obkv)?;
jsons.push(json);
}
Ok(jsons)
}
}
#[derive(Debug, StructOpt)]
struct SettingsUpdate {
#[structopt(long)]
filterable_attributes: Option<Vec<String>>,
#[structopt(long)]
criteria: Option<Vec<String>>,
#[structopt(long)]
exact_attributes: Option<Vec<String>>,
#[structopt(long)]
distinct_attribute: Option<String>,
}
impl Performer for SettingsUpdate {
fn perform(self, index: milli::Index) -> Result<()> {
let mut txn = index.write_txn()?;
let config = IndexerConfig { log_every_n: Some(100), ..Default::default() };
let mut update = milli::update::Settings::new(&mut txn, &index, &config);
if let Some(ref filterable_attributes) = self.filterable_attributes {
if !filterable_attributes.is_empty() {
update.set_filterable_fields(filterable_attributes.iter().cloned().collect());
} else {
update.reset_filterable_fields();
}
}
if let Some(criteria) = self.criteria {
if !criteria.is_empty() {
update.set_criteria(criteria.iter().map(|c| c.parse()).collect::<Result<_, _>>()?);
} else {
update.reset_criteria();
}
}
if let Some(exact_attributes) = self.exact_attributes {
if !exact_attributes.is_empty() {
update.set_exact_attributes(exact_attributes.into_iter().collect());
} else {
update.reset_exact_attributes();
}
}
if let Some(distinct_attr) = self.distinct_attribute {
if !distinct_attr.is_empty() {
update.set_distinct_field(distinct_attr);
} else {
update.reset_distinct_field();
}
}
let mut bars = Vec::new();
let progesses = MultiProgress::new();
for _ in 0..4 {
let bar = ProgressBar::hidden();
let bar = progesses.add(bar);
bars.push(bar);
}
update.execute(|step| indexing_callback(step, &bars), || false)?;
txn.commit()?;
Ok(())
}
}