diff --git a/Cargo.lock b/Cargo.lock index 987040642..a3152498c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -289,30 +289,12 @@ dependencies = [ "alloc-no-stdlib", ] -[[package]] -name = "android_system_properties" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" -dependencies = [ - "libc", -] - [[package]] name = "anes" version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" -[[package]] -name = "ansi_term" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" -dependencies = [ - "winapi", -] - [[package]] name = "anyhow" version = "1.0.68" @@ -697,20 +679,6 @@ dependencies = [ "whatlang", ] -[[package]] -name = "chrono" -version = "0.4.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16b0a3d9ed01224b22057780a37bb8c5dbfe1be8ba48678e7bf57ec4b385411f" -dependencies = [ - "iana-time-zone", - "js-sys", - "num-integer", - "num-traits", - "wasm-bindgen", - "winapi", -] - [[package]] name = "ciborium" version = "0.2.0" @@ -747,21 +715,6 @@ dependencies = [ "generic-array", ] -[[package]] -name = "clap" -version = "2.34.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" -dependencies = [ - "ansi_term", - "atty", - "bitflags", - "strsim 0.8.0", - "textwrap 0.11.0", - "unicode-width", - "vec_map", -] - [[package]] name = "clap" version = "3.2.23" @@ -774,9 +727,9 @@ dependencies = [ "clap_lex 0.2.4", "indexmap", "once_cell", - "strsim 0.10.0", + "strsim", "termcolor", - "textwrap 0.16.0", + "textwrap", ] [[package]] @@ -790,7 +743,7 @@ dependencies = [ "clap_lex 0.3.0", "is-terminal", "once_cell", - "strsim 0.10.0", + "strsim", "termcolor", ] @@ -800,7 +753,7 @@ version = "3.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea0c8bce528c4be4da13ea6fead8965e95b6073585a2f05204bd8f4119f82a65" dependencies = [ - "heck 0.4.0", + "heck", "proc-macro-error", "proc-macro2 1.0.49", "quote 1.0.23", @@ -813,7 +766,7 @@ version = "4.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0177313f9f02afc995627906bbd8967e2be069f5261954222dac78290c2b9014" dependencies = [ - "heck 0.4.0", + "heck", "proc-macro-error", "proc-macro2 1.0.49", "quote 1.0.23", @@ -838,61 +791,6 @@ dependencies = [ "os_str_bytes", ] -[[package]] -name = "cli" -version = "1.0.0" -dependencies = [ - "bimap", - "byte-unit", - "color-eyre", - "csv", - "eyre", - "indicatif", - "milli 1.0.0", - "mimalloc", - "serde", - "serde_json", - "stderrlog", - "structopt", -] - -[[package]] -name = "codespan-reporting" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e" -dependencies = [ - "termcolor", - "unicode-width", -] - -[[package]] -name = "color-eyre" -version = "0.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a667583cca8c4f8436db8de46ea8233c42a7d9ae424a82d338f2e4675229204" -dependencies = [ - "backtrace", - "color-spantrace", - "eyre", - "indenter", - "once_cell", - "owo-colors", - "tracing-error", -] - -[[package]] -name = "color-spantrace" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ba75b3d9449ecdccb27ecbc479fdc0b87fa2dd43d2f8298f9bf0e59aacc8dce" -dependencies = [ - "once_cell", - "owo-colors", - "tracing-core", - "tracing-error", -] - [[package]] name = "concat-arrays" version = "0.1.2" @@ -913,7 +811,6 @@ dependencies = [ "encode_unicode", "lazy_static", "libc", - "unicode-width", "windows-sys", ] @@ -1135,50 +1032,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "cxx" -version = "1.0.86" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51d1075c37807dcf850c379432f0df05ba52cc30f279c5cfc43cc221ce7f8579" -dependencies = [ - "cc", - "cxxbridge-flags", - "cxxbridge-macro", - "link-cplusplus", -] - -[[package]] -name = "cxx-build" -version = "1.0.86" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5044281f61b27bc598f2f6647d480aed48d2bf52d6eb0b627d84c0361b17aa70" -dependencies = [ - "cc", - "codespan-reporting", - "once_cell", - "proc-macro2 1.0.49", - "quote 1.0.23", - "scratch", - "syn 1.0.107", -] - -[[package]] -name = "cxxbridge-flags" -version = "1.0.86" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61b50bc93ba22c27b0d31128d2d130a0a6b3d267ae27ef7e4fae2167dfe8781c" - -[[package]] -name = "cxxbridge-macro" -version = "1.0.86" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39e61fda7e62115119469c7b3591fd913ecca96fb766cfd3f2e2502ab7bc87a5" -dependencies = [ - "proc-macro2 1.0.49", - "quote 1.0.23", - "syn 1.0.107", -] - [[package]] name = "darling" version = "0.14.2" @@ -1199,7 +1052,7 @@ dependencies = [ "ident_case", "proc-macro2 1.0.49", "quote 1.0.23", - "strsim 0.10.0", + "strsim", "syn 1.0.107", ] @@ -1495,16 +1348,6 @@ dependencies = [ "libc", ] -[[package]] -name = "eyre" -version = "0.6.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c2b6b5a29c02cdc822728b7d7b8ae1bab3e3b05d44522770ddd49722eeac7eb" -dependencies = [ - "indenter", - "once_cell", -] - [[package]] name = "fastrand" version = "1.8.0" @@ -1907,15 +1750,6 @@ dependencies = [ "stable_deref_trait", ] -[[package]] -name = "heck" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" -dependencies = [ - "unicode-segmentation", -] - [[package]] name = "heck" version = "0.4.0" @@ -2066,30 +1900,6 @@ dependencies = [ "tokio-rustls", ] -[[package]] -name = "iana-time-zone" -version = "0.1.53" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64c122667b287044802d6ce17ee2ddf13207ed924c712de9a66a5814d5b64765" -dependencies = [ - "android_system_properties", - "core-foundation-sys", - "iana-time-zone-haiku", - "js-sys", - "wasm-bindgen", - "winapi", -] - -[[package]] -name = "iana-time-zone-haiku" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0703ae284fc167426161c2e3f1da3ea71d94b21bedbcc9494e92b28e334e3dca" -dependencies = [ - "cxx", - "cxx-build", -] - [[package]] name = "ident_case" version = "1.0.1" @@ -2106,12 +1916,6 @@ dependencies = [ "unicode-normalization", ] -[[package]] -name = "indenter" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce23b50ad8242c51a442f3ff322d56b02f08852c77e4c0b4d3fd684abc89c683" - [[package]] name = "index-scheduler" version = "1.0.0" @@ -2152,18 +1956,6 @@ dependencies = [ "serde", ] -[[package]] -name = "indicatif" -version = "0.17.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cef509aa9bc73864d6756f0d34d35504af3cf0844373afe9b8669a5b8005a729" -dependencies = [ - "console", - "number_prefix", - "portable-atomic", - "unicode-width", -] - [[package]] name = "insta" version = "1.24.1" @@ -2544,15 +2336,6 @@ dependencies = [ "yada", ] -[[package]] -name = "link-cplusplus" -version = "1.0.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ecd207c9c713c34f95a097a5b029ac2ce6010530c7b49d7fea24d977dede04f5" -dependencies = [ - "cc", -] - [[package]] name = "linked-hash-map" version = "0.5.6" @@ -3071,12 +2854,6 @@ dependencies = [ "libc", ] -[[package]] -name = "number_prefix" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" - [[package]] name = "object" version = "0.27.1" @@ -3140,12 +2917,6 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" -[[package]] -name = "owo-colors" -version = "3.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1b04fb49957986fdce4d6ee7a65027d55d4b6d2265e5848bbb507b58ccfdb6f" - [[package]] name = "page_size" version = "0.4.2" @@ -3399,12 +3170,6 @@ dependencies = [ "plotters-backend", ] -[[package]] -name = "portable-atomic" -version = "0.3.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26f6a7b87c2e435a3241addceeeff740ff8b7e76b74c13bf9acb17fa454ea00b" - [[package]] name = "ppv-lite86" version = "0.2.17" @@ -3836,12 +3601,6 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" -[[package]] -name = "scratch" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddccb15bcce173023b3fedd9436f882a0739b8dfb45e4f6b6002bee5929f61b2" - [[package]] name = "sct" version = "0.7.0" @@ -3958,15 +3717,6 @@ dependencies = [ "digest", ] -[[package]] -name = "sharded-slab" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "900fba806f70c630b0a382d0d825e17a0f19fcd059a2ade1ff237bcddf446b31" -dependencies = [ - "lazy_static", -] - [[package]] name = "signal-hook-registry" version = "1.4.0" @@ -4090,55 +3840,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" -[[package]] -name = "stderrlog" -version = "0.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69a26bbf6de627d389164afa9783739b56746c6c72c4ed16539f4ff54170327b" -dependencies = [ - "atty", - "chrono", - "log", - "termcolor", - "thread_local", -] - -[[package]] -name = "strsim" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" - [[package]] name = "strsim" version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" -[[package]] -name = "structopt" -version = "0.3.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c6b5c64445ba8094a6ab0c3cd2ad323e07171012d9c98b0b15651daf1787a10" -dependencies = [ - "clap 2.34.0", - "lazy_static", - "structopt-derive", -] - -[[package]] -name = "structopt-derive" -version = "0.4.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcb5ae327f9cc13b68763b5749770cb9e048a99bd9dfdfa58d0cf05d5f64afe0" -dependencies = [ - "heck 0.3.3", - "proc-macro-error", - "proc-macro2 1.0.49", - "quote 1.0.23", - "syn 1.0.107", -] - [[package]] name = "subtle" version = "2.4.1" @@ -4246,15 +3953,6 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "textwrap" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" -dependencies = [ - "unicode-width", -] - [[package]] name = "textwrap" version = "0.16.0" @@ -4281,15 +3979,6 @@ dependencies = [ "syn 1.0.107", ] -[[package]] -name = "thread_local" -version = "1.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5516c27b78311c50bf42c071425c560ac799b11c30b31f87e3081965fe5e0180" -dependencies = [ - "once_cell", -] - [[package]] name = "time" version = "0.3.17" @@ -4443,28 +4132,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24eb03ba0eab1fd845050058ce5e616558e8f8d8fca633e6b163fe25c797213a" dependencies = [ "once_cell", - "valuable", -] - -[[package]] -name = "tracing-error" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d686ec1c0f384b1277f097b2f279a2ecc11afe8c133c1aabf036a27cb4cd206e" -dependencies = [ - "tracing", - "tracing-subscriber", -] - -[[package]] -name = "tracing-subscriber" -version = "0.3.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6176eae26dd70d0c919749377897b54a9276bd7061339665dd68777926b5a70" -dependencies = [ - "sharded-slab", - "thread_local", - "tracing-core", ] [[package]] @@ -4587,24 +4254,12 @@ dependencies = [ "serde", ] -[[package]] -name = "valuable" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" - [[package]] name = "vcpkg" version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" -[[package]] -name = "vec_map" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" - [[package]] name = "vergen" version = "7.5.0" diff --git a/Cargo.toml b/Cargo.toml index 05d665990..a76827de0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,8 +13,7 @@ members = [ "filter-parser", "flatten-serde-json", "json-depth-checker", - "benchmarks", - "cli" + "benchmarks" ] [profile.release] diff --git a/cli/Cargo.toml b/cli/Cargo.toml deleted file mode 100644 index 446c924df..000000000 --- a/cli/Cargo.toml +++ /dev/null @@ -1,23 +0,0 @@ -[package] -name = "cli" -version = "1.0.0" -edition = "2018" -description = "A CLI to interact with a milli index" -publish = false - -[dependencies] -bimap = "0.6.2" -byte-unit = { version = "4.0.14", default-features = false, features = ["std", "serde"] } -color-eyre = "0.6.2" -csv = "1.1.6" -eyre = "0.6.8" -indicatif = "0.17.1" -milli = { path = "../milli", default-features = false } -mimalloc = { version = "0.1.29", default-features = false } -serde = "1.0.145" -serde_json = "1.0.85" -stderrlog = "0.5.3" -structopt = "0.3.26" - -[features] -default = ["milli/default"] diff --git a/cli/src/main.rs b/cli/src/main.rs deleted file mode 100644 index fb4d41ee2..000000000 --- a/cli/src/main.rs +++ /dev/null @@ -1,559 +0,0 @@ -use std::collections::BTreeMap; -use std::fmt::Display; -use std::fs::File; -use std::io::{stdin, BufRead, BufReader, Cursor, Read, Write}; -use std::path::PathBuf; -use std::str::FromStr; -use std::time::{Duration, Instant}; - -use byte_unit::Byte; -use eyre::Result; -use indicatif::{MultiProgress, ProgressBar, ProgressStyle}; -use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; -use milli::update::UpdateIndexingStep::{ - ComputeIdsAndMergeDocuments, IndexDocuments, MergeDataIntoFinalDatabase, RemapDocumentAddition, -}; -use milli::update::{self, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig}; -use milli::{heed, CriterionImplementationStrategy, Index, Object}; -use structopt::StructOpt; - -#[global_allocator] -static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc; - -#[derive(Debug, StructOpt)] -#[structopt(name = "Milli CLI", about = "A simple CLI to manipulate a milli index.")] -struct Cli { - #[structopt(short, long, default_value = ".")] - index_path: PathBuf, - #[structopt(short = "s", long, default_value = "100GiB")] - index_size: Byte, - /// Verbose mode (-v, -vv, -vvv, etc.) - #[structopt(short, long, parse(from_occurrences))] - verbose: usize, - #[structopt(subcommand)] - subcommand: Command, -} - -#[derive(Debug, StructOpt)] -enum Command { - Documents { - #[structopt(subcommand)] - cmd: Documents, - }, - Search(Search), - Settings { - #[structopt(subcommand)] - cmd: Settings, - }, -} - -impl Performer for Command { - fn perform(self, index: Index) -> Result<()> { - match self { - Command::Documents { cmd } => cmd.perform(index), - Command::Search(cmd) => cmd.perform(index), - Command::Settings { cmd } => cmd.perform(index), - } - } -} - -#[derive(Debug, StructOpt)] -enum Settings { - Update(SettingsUpdate), - Show, -} - -impl Settings { - fn show(&self, index: Index) -> Result<()> { - let txn = index.read_txn()?; - let displayed_attributes = index - .displayed_fields(&txn)? - .map(|fields| fields.into_iter().map(String::from).collect()); - - let searchable_attributes: Option> = index - .searchable_fields(&txn)? - .map(|fields| fields.into_iter().map(String::from).collect()); - - let filterable_attributes: Vec<_> = index.filterable_fields(&txn)?.into_iter().collect(); - - let sortable_attributes: Vec<_> = index.sortable_fields(&txn)?.into_iter().collect(); - - let criteria: Vec<_> = index.criteria(&txn)?.into_iter().map(|c| c.to_string()).collect(); - - let stop_words = index - .stop_words(&txn)? - .map(|stop_words| -> Result> { - Ok(stop_words.stream().into_strs()?.into_iter().collect()) - }) - .transpose()? - .unwrap_or_default(); - let distinct_field = index.distinct_field(&txn)?.map(String::from); - - // in milli each word in the synonyms map were split on their separator. Since we lost - // this information we are going to put space between words. - let synonyms: BTreeMap<_, Vec<_>> = index - .synonyms(&txn)? - .iter() - .map(|(key, values)| { - (key.join(" "), values.iter().map(|value| value.join(" ")).collect()) - }) - .collect(); - - let exact_attributes = index.exact_attributes(&txn)?; - - println!( - "displayed attributes:\n\t{}\nsearchable attributes:\n\t{}\nfilterable attributes:\n\t{}\nsortable attributes:\n\t{}\ncriterion:\n\t{}\nstop words:\n\t{}\ndistinct fields:\n\t{}\nsynonyms:\n\t{}\nexact attributes:\n\t{}\n", - displayed_attributes.unwrap_or_else(|| vec!["*".to_owned()]).join("\n\t"), - searchable_attributes.unwrap_or_else(|| vec!["*".to_owned()]).join("\n\t"), - filterable_attributes.join("\n\t"), - sortable_attributes.join("\n\t"), - criteria.join("\n\t"), - stop_words.join("\n\t"), - distinct_field.unwrap_or_default(), - synonyms.into_iter().map(|(k, v)| format!("\n\t{}:\n{:?}", k, v)).collect::(), - exact_attributes.join("\n\t"), - ); - Ok(()) - } -} - -impl Performer for Settings { - fn perform(self, index: Index) -> Result<()> { - match self { - Settings::Update(update) => update.perform(index), - Settings::Show => self.show(index), - } - } -} - -#[derive(Debug, StructOpt)] -enum Documents { - Add(DocumentAddition), -} - -impl Performer for Documents { - fn perform(self, index: Index) -> Result<()> { - match self { - Self::Add(addition) => addition.perform(index), - } - } -} - -trait Performer { - fn perform(self, index: Index) -> Result<()>; -} - -fn setup(opt: &Cli) -> Result<()> { - color_eyre::install()?; - stderrlog::new() - .verbosity(opt.verbose) - .show_level(false) - .timestamp(stderrlog::Timestamp::Off) - .init()?; - Ok(()) -} - -fn main() -> Result<()> { - let command = Cli::from_args(); - - setup(&command)?; - - let mut options = heed::EnvOpenOptions::new(); - options.map_size(command.index_size.get_bytes() as usize); - let index = milli::Index::new(options, command.index_path)?; - - command.subcommand.perform(index)?; - - Ok(()) -} - -#[derive(Debug)] -enum DocumentAdditionFormat { - Csv, - Json, - Jsonl, -} - -impl FromStr for DocumentAdditionFormat { - type Err = eyre::Error; - - fn from_str(s: &str) -> Result { - match s { - "csv" => Ok(Self::Csv), - "jsonl" => Ok(Self::Jsonl), - "json" => Ok(Self::Json), - other => eyre::bail!("invalid format: {}", other), - } - } -} - -#[derive(Debug, StructOpt)] -struct DocumentAddition { - #[structopt(short, long, default_value = "json", possible_values = &["csv", "jsonl", "json"])] - format: DocumentAdditionFormat, - /// Path to the update file, if not present, will read from stdin. - #[structopt(short, long)] - path: Option, - /// Specify the primary key. - #[structopt(long)] - primary: Option, - /// Whether to generate missing document ids. - #[structopt(short, long)] - autogen_docids: bool, - /// Whether to update or replace the documents if they already exist. - #[structopt(short, long)] - update_documents: bool, -} - -impl Performer for DocumentAddition { - fn perform(self, index: milli::Index) -> Result<()> { - let reader: Box = match self.path { - Some(ref path) => { - let file = File::open(path)?; - Box::new(file) - } - None => Box::new(stdin()), - }; - - println!("parsing documents..."); - - let reader = BufReader::new(reader); - - let documents = match self.format { - DocumentAdditionFormat::Csv => documents_from_csv(reader)?, - DocumentAdditionFormat::Json => documents_from_json(reader)?, - DocumentAdditionFormat::Jsonl => documents_from_jsonl(reader)?, - }; - - let reader = DocumentsBatchReader::from_reader(Cursor::new(documents))?; - - println!("Adding {} documents to the index.", reader.documents_count()); - - let mut txn = index.write_txn()?; - let config = milli::update::IndexerConfig { log_every_n: Some(100), ..Default::default() }; - let update_method = if self.update_documents { - IndexDocumentsMethod::UpdateDocuments - } else { - IndexDocumentsMethod::ReplaceDocuments - }; - - if let Some(primary) = self.primary { - let mut builder = update::Settings::new(&mut txn, &index, &config); - builder.set_primary_key(primary); - builder.execute(|_| (), || false).unwrap(); - } - - let indexing_config = IndexDocumentsConfig { - update_method, - autogenerate_docids: self.autogen_docids, - ..Default::default() - }; - let mut bars = Vec::new(); - let progesses = MultiProgress::new(); - for _ in 0..4 { - let bar = ProgressBar::hidden(); - let bar = progesses.add(bar); - bars.push(bar); - } - let addition = milli::update::IndexDocuments::new( - &mut txn, - &index, - &config, - indexing_config, - |step| indexing_callback(step, &bars), - || false, - ) - .unwrap(); - let (addition, user_error) = addition.add_documents(reader)?; - if let Err(error) = user_error { - return Err(error.into()); - } - - let result = addition.execute()?; - - txn.commit()?; - - println!("{:?}", result); - Ok(()) - } -} - -fn indexing_callback(step: milli::update::UpdateIndexingStep, bars: &[ProgressBar]) { - let step_index = step.step(); - let bar = &bars[step_index]; - if step_index > 0 { - let prev = &bars[step_index - 1]; - if !prev.is_finished() { - prev.disable_steady_tick(); - prev.finish(); - } - } - - let style = ProgressStyle::default_bar() - .progress_chars("##-") - .template("[eta: {eta_precise}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}") - .unwrap(); - - match step { - RemapDocumentAddition { documents_seen } => { - bar.set_style(ProgressStyle::default_spinner()); - bar.set_message(format!("remapped {} documents so far.", documents_seen)); - } - ComputeIdsAndMergeDocuments { documents_seen, total_documents } => { - bar.set_style(style); - bar.set_length(total_documents as u64); - bar.set_message("Merging documents..."); - bar.set_position(documents_seen as u64); - } - IndexDocuments { documents_seen, total_documents } => { - bar.set_style(style); - bar.set_length(total_documents as u64); - bar.set_message("Indexing documents..."); - bar.set_position(documents_seen as u64); - } - MergeDataIntoFinalDatabase { databases_seen, total_databases } => { - bar.set_style(style); - bar.set_length(total_databases as u64); - bar.set_message("Merging databases..."); - bar.set_position(databases_seen as u64); - } - } - bar.enable_steady_tick(Duration::from_millis(200)); -} - -fn documents_from_jsonl(reader: impl Read) -> Result> { - let mut documents = DocumentsBatchBuilder::new(Vec::new()); - let reader = BufReader::new(reader); - - for result in serde_json::Deserializer::from_reader(reader).into_iter::() { - let object = result?; - documents.append_json_object(&object)?; - } - - documents.into_inner().map_err(Into::into) -} - -fn documents_from_json(reader: impl Read) -> Result> { - let mut documents = DocumentsBatchBuilder::new(Vec::new()); - - documents.append_json_array(reader)?; - - documents.into_inner().map_err(Into::into) -} - -fn documents_from_csv(reader: impl Read) -> Result> { - let csv = csv::Reader::from_reader(reader); - - let mut documents = DocumentsBatchBuilder::new(Vec::new()); - documents.append_csv(csv)?; - - documents.into_inner().map_err(Into::into) -} - -#[derive(Debug, Clone, Copy)] -struct SearchStrategyOption(CriterionImplementationStrategy); -impl FromStr for SearchStrategyOption { - type Err = String; - fn from_str(s: &str) -> Result { - match s.to_lowercase().as_str() { - "dynamic" => Ok(SearchStrategyOption(CriterionImplementationStrategy::Dynamic)), - "set" => Ok(SearchStrategyOption(CriterionImplementationStrategy::OnlySetBased)), - "iterative" => Ok(SearchStrategyOption(CriterionImplementationStrategy::OnlyIterative)), - _ => Err("could not parse {s} as a criterion implementation strategy, available options are `dynamic`, `set`, and `iterative`".to_owned()), - } - } -} -impl Display for SearchStrategyOption { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self.0 { - CriterionImplementationStrategy::OnlyIterative => Display::fmt("iterative", f), - CriterionImplementationStrategy::OnlySetBased => Display::fmt("set", f), - CriterionImplementationStrategy::Dynamic => Display::fmt("dynamic", f), - } - } -} - -#[derive(Debug, StructOpt)] -struct Search { - query: Option, - #[structopt(short, long)] - filter: Option, - #[structopt(short, long)] - offset: Option, - #[structopt(short, long)] - limit: Option, - #[structopt(short, long, conflicts_with = "query")] - interactive: bool, - #[structopt(short, long)] - strategy: Option, -} - -impl Performer for Search { - fn perform(self, index: milli::Index) -> Result<()> { - if self.interactive { - let stdin = std::io::stdin(); - let mut lines = stdin.lock().lines(); - loop { - eprint!("> "); - std::io::stdout().flush()?; - match lines.next() { - Some(Ok(line)) => { - let now = Instant::now(); - let jsons = Self::perform_single_search( - &index, - &Some(line), - &self.filter, - &self.offset, - &self.limit, - &self.strategy, - )?; - - let time = now.elapsed(); - - let hits = serde_json::to_string_pretty(&jsons)?; - - println!("{}", hits); - - eprintln!("found {} results in {:.02?}", jsons.len(), time); - } - _ => break, - } - } - } else { - let now = Instant::now(); - let jsons = Self::perform_single_search( - &index, - &self.query, - &self.filter, - &self.offset, - &self.limit, - &self.strategy, - )?; - - let time = now.elapsed(); - - let hits = serde_json::to_string_pretty(&jsons)?; - - println!("{}", hits); - eprintln!("found {} results in {:.02?}", jsons.len(), time); - } - - Ok(()) - } -} - -impl Search { - fn perform_single_search( - index: &milli::Index, - query: &Option, - filter: &Option, - offset: &Option, - limit: &Option, - strategy: &Option, - ) -> Result> { - let txn = index.read_txn()?; - let mut search = index.search(&txn); - - if let Some(ref query) = query { - search.query(query); - } - - if let Some(ref filter) = filter { - if let Some(condition) = milli::Filter::from_str(filter)? { - search.filter(condition); - } - } - - if let Some(offset) = offset { - search.offset(*offset); - } - - if let Some(limit) = limit { - search.limit(*limit); - } - if let Some(strategy) = strategy { - search.criterion_implementation_strategy(strategy.0); - } - - let result = search.execute()?; - - let fields_ids_map = index.fields_ids_map(&txn)?; - let displayed_fields = - index.displayed_fields_ids(&txn)?.unwrap_or_else(|| fields_ids_map.ids().collect()); - let documents = index.documents(&txn, result.documents_ids)?; - let mut jsons = Vec::new(); - for (_, obkv) in documents { - let json = milli::obkv_to_json(&displayed_fields, &fields_ids_map, obkv)?; - jsons.push(json); - } - - Ok(jsons) - } -} - -#[derive(Debug, StructOpt)] -struct SettingsUpdate { - #[structopt(long)] - filterable_attributes: Option>, - #[structopt(long)] - criteria: Option>, - #[structopt(long)] - exact_attributes: Option>, - #[structopt(long)] - distinct_attribute: Option, -} - -impl Performer for SettingsUpdate { - fn perform(self, index: milli::Index) -> Result<()> { - let mut txn = index.write_txn()?; - - let config = IndexerConfig { log_every_n: Some(100), ..Default::default() }; - - let mut update = milli::update::Settings::new(&mut txn, &index, &config); - - if let Some(ref filterable_attributes) = self.filterable_attributes { - if !filterable_attributes.is_empty() { - update.set_filterable_fields(filterable_attributes.iter().cloned().collect()); - } else { - update.reset_filterable_fields(); - } - } - - if let Some(criteria) = self.criteria { - if !criteria.is_empty() { - update.set_criteria(criteria.iter().map(|c| c.parse()).collect::>()?); - } else { - update.reset_criteria(); - } - } - - if let Some(exact_attributes) = self.exact_attributes { - if !exact_attributes.is_empty() { - update.set_exact_attributes(exact_attributes.into_iter().collect()); - } else { - update.reset_exact_attributes(); - } - } - - if let Some(distinct_attr) = self.distinct_attribute { - if !distinct_attr.is_empty() { - update.set_distinct_field(distinct_attr); - } else { - update.reset_distinct_field(); - } - } - - let mut bars = Vec::new(); - let progesses = MultiProgress::new(); - for _ in 0..4 { - let bar = ProgressBar::hidden(); - let bar = progesses.add(bar); - bars.push(bar); - } - - update.execute(|step| indexing_callback(step, &bars), || false)?; - - txn.commit()?; - Ok(()) - } -}