Merge pull request #81 from meilisearch/smart-workspace

Change the project to become a workspace
This commit is contained in:
Clément Renault 2021-02-14 19:02:00 +01:00 committed by GitHub
commit 5d0ac3e3e6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
58 changed files with 1250 additions and 2744 deletions

3
.gitignore vendored
View File

@ -2,9 +2,6 @@
/target /target
/Cargo.lock /Cargo.lock
# the sub target folder
http-ui/target
# datasets # datasets
*.csv *.csv
*.mmdb *.mmdb

1210
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,70 +1,6 @@
[package] [workspace]
name = "milli" members = ["milli", "http-ui", "infos", "search"]
version = "0.1.0" default-members = ["milli"]
authors = ["Kerollmops <clement@meilisearch.com>"]
edition = "2018"
[dependencies]
anyhow = "1.0.28"
bstr = "0.2.13"
byte-unit = { version = "4.0.9", default-features = false, features = ["std"] }
byteorder = "1.3.4"
crossbeam-channel = "0.5.0"
csv = "1.1.3"
either = "1.6.1"
flate2 = "1.0.17"
fst = "0.4.5"
fxhash = "0.2.1"
grenad = { git = "https://github.com/Kerollmops/grenad.git", rev = "3adcb26" }
heed = { version = "0.10.5", default-features = false, features = ["lmdb", "sync-read-txn"] }
human_format = "1.0.3"
jemallocator = "0.3.2"
levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] }
linked-hash-map = "0.5.3"
meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", branch = "main" }
memmap = "0.7.0"
near-proximity = { git = "https://github.com/Kerollmops/plane-sweep-proximity", rev = "6608205" }
num-traits = "0.2.14"
obkv = "0.1.0"
once_cell = "1.4.0"
ordered-float = "2.0.0"
rayon = "1.3.1"
regex = "1.4.2"
ringtail = "0.3.0"
roaring = "0.6.4"
serde = { version = "1.0", features = ["derive"] }
serde_json = { version = "1.0.59", features = ["preserve_order"] }
slice-group-by = "0.2.6"
smallstr = { version = "0.2.0", features = ["serde"] }
smallvec = "1.4.0"
structopt = { version = "0.3.14", default-features = false, features = ["wrap_help"] }
tempfile = "3.1.0"
uuid = { version = "0.8.1", features = ["v4"] }
# facet filter parser
pest = { git = "https://github.com/pest-parser/pest.git", rev = "51fd1d49f1041f7839975664ef71fe15c7dcaf67" }
pest_derive = "2.1.0"
# documents words self-join
itertools = "0.9.0"
# logging
log = "0.4.11"
stderrlog = "0.5.0"
[dev-dependencies]
criterion = "0.3.3"
maplit = "1.0.2"
[build-dependencies]
fst = "0.4.5"
[features]
default = []
[[bench]]
name = "search"
harness = false
[profile.release] [profile.release]
debug = true debug = true

2530
http-ui/Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -12,7 +12,7 @@ grenad = { git = "https://github.com/Kerollmops/grenad.git", rev = "3adcb26" }
heed = "0.10.5" heed = "0.10.5"
meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", branch = "main" } meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", branch = "main" }
memmap = "0.7.0" memmap = "0.7.0"
milli = { path = ".." } milli = { path = "../milli" }
once_cell = "1.4.1" once_cell = "1.4.1"
rayon = "1.5.0" rayon = "1.5.0"
structopt = { version = "0.3.14", default-features = false, features = ["wrap_help"] } structopt = { version = "0.3.14", default-features = false, features = ["wrap_help"] }
@ -34,3 +34,6 @@ warp = "0.2.2"
log = "0.4.11" log = "0.4.11"
stderrlog = "0.5.0" stderrlog = "0.5.0"
fst = "0.4.5" fst = "0.4.5"
# Temporary fix for bitvec, remove once fixed. (https://github.com/bitvecto-rs/bitvec/issues/105)
funty = "=1.1.0"

17
infos/Cargo.toml Normal file
View File

@ -0,0 +1,17 @@
[package]
name = "infos"
version = "0.1.0"
authors = ["Clément Renault <clement@meilisearch.com>"]
edition = "2018"
[dependencies]
anyhow = "1.0.28"
byte-unit = { version = "4.0.9", default-features = false, features = ["std"] }
csv = "1.1.3"
heed = "0.10.5"
jemallocator = "0.3.2"
milli = { path = "../milli" }
roaring = "0.6.4"
serde_json = "1.0.59"
stderrlog = "0.5.0"
structopt = { version = "0.3.14", default-features = false }

View File

@ -4,12 +4,16 @@ use std::{str, io, fmt};
use anyhow::Context; use anyhow::Context;
use byte_unit::Byte; use byte_unit::Byte;
use crate::Index;
use heed::EnvOpenOptions; use heed::EnvOpenOptions;
use milli::Index;
use structopt::StructOpt; use structopt::StructOpt;
use Command::*; use Command::*;
#[cfg(target_os = "linux")]
#[global_allocator]
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
const MAIN_DB_NAME: &str = "main"; const MAIN_DB_NAME: &str = "main";
const WORD_DOCIDS_DB_NAME: &str = "word-docids"; const WORD_DOCIDS_DB_NAME: &str = "word-docids";
const DOCID_WORD_POSITIONS_DB_NAME: &str = "docid-word-positions"; const DOCID_WORD_POSITIONS_DB_NAME: &str = "docid-word-positions";
@ -153,7 +157,18 @@ enum Command {
PatchToNewExternalIds, PatchToNewExternalIds,
} }
pub fn run(opt: Opt) -> anyhow::Result<()> { fn main() -> Result<(), ()> {
let opt = Opt::from_args();
match run(opt) {
Ok(()) => Ok(()),
Err(e) => {
eprintln!("{}", e);
Err(())
},
}
}
fn run(opt: Opt) -> anyhow::Result<()> {
stderrlog::new() stderrlog::new()
.verbosity(opt.verbose) .verbosity(opt.verbose)
.show_level(false) .show_level(false)
@ -204,7 +219,7 @@ fn patch_to_new_external_ids(index: &Index, wtxn: &mut heed::RwTxn) -> anyhow::R
let documents_ids = documents_ids.to_owned(); let documents_ids = documents_ids.to_owned();
index.main.put::<_, ByteSlice, ByteSlice>( index.main.put::<_, ByteSlice, ByteSlice>(
wtxn, wtxn,
crate::index::HARD_EXTERNAL_DOCUMENTS_IDS_KEY.as_bytes(), milli::index::HARD_EXTERNAL_DOCUMENTS_IDS_KEY.as_bytes(),
&documents_ids, &documents_ids,
)?; )?;
index.main.delete::<_, ByteSlice>(wtxn, USERS_IDS_DOCUMENTS_IDS)?; index.main.delete::<_, ByteSlice>(wtxn, USERS_IDS_DOCUMENTS_IDS)?;
@ -242,7 +257,7 @@ fn facet_values_iter<'txn, DC: 'txn, T>(
rtxn: &'txn heed::RoTxn, rtxn: &'txn heed::RoTxn,
db: heed::Database<heed::types::ByteSlice, DC>, db: heed::Database<heed::types::ByteSlice, DC>,
field_id: u8, field_id: u8,
facet_type: crate::facet::FacetType, facet_type: milli::facet::FacetType,
string_fn: impl Fn(&str) -> T + 'txn, string_fn: impl Fn(&str) -> T + 'txn,
float_fn: impl Fn(u8, f64, f64) -> T + 'txn, float_fn: impl Fn(u8, f64, f64) -> T + 'txn,
integer_fn: impl Fn(u8, i64, i64) -> T + 'txn, integer_fn: impl Fn(u8, i64, i64) -> T + 'txn,
@ -250,8 +265,8 @@ fn facet_values_iter<'txn, DC: 'txn, T>(
where where
DC: heed::BytesDecode<'txn>, DC: heed::BytesDecode<'txn>,
{ {
use crate::facet::FacetType; use milli::facet::FacetType;
use crate::heed_codec::facet::{ use milli::heed_codec::facet::{
FacetValueStringCodec, FacetLevelValueF64Codec, FacetLevelValueI64Codec, FacetValueStringCodec, FacetLevelValueF64Codec, FacetLevelValueI64Codec,
}; };
@ -504,7 +519,7 @@ fn export_words_fst(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> {
fn export_documents(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> { fn export_documents(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> {
use std::io::{BufWriter, Write as _}; use std::io::{BufWriter, Write as _};
use crate::obkv_to_json; use milli::obkv_to_json;
let stdout = io::stdout(); let stdout = io::stdout();
let mut out = BufWriter::new(stdout); let mut out = BufWriter::new(stdout);
@ -548,7 +563,7 @@ fn total_docid_word_positions_size(index: &Index, rtxn: &heed::RoTxn) -> anyhow:
fn average_number_of_words_by_doc(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> { fn average_number_of_words_by_doc(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> {
use heed::types::DecodeIgnore; use heed::types::DecodeIgnore;
use crate::{DocumentId, BEU32StrCodec}; use milli::{DocumentId, BEU32StrCodec};
let mut words_counts = Vec::new(); let mut words_counts = Vec::new();
let mut count = 0; let mut count = 0;
@ -587,7 +602,7 @@ fn average_number_of_words_by_doc(index: &Index, rtxn: &heed::RoTxn) -> anyhow::
fn average_number_of_positions_by_word(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> { fn average_number_of_positions_by_word(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> {
use heed::types::DecodeIgnore; use heed::types::DecodeIgnore;
use crate::BoRoaringBitmapCodec; use milli::BoRoaringBitmapCodec;
let mut values_length = Vec::new(); let mut values_length = Vec::new();
let mut count = 0; let mut count = 0;
@ -639,7 +654,7 @@ fn database_stats(index: &Index, rtxn: &heed::RoTxn, name: &str) -> anyhow::Resu
use heed::types::ByteSlice; use heed::types::ByteSlice;
use heed::{Error, BytesDecode}; use heed::{Error, BytesDecode};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec}; use milli::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec};
fn compute_stats<'a, DC: BytesDecode<'a, DItem = RoaringBitmap>>( fn compute_stats<'a, DC: BytesDecode<'a, DItem = RoaringBitmap>>(
db: heed::PolyDatabase, db: heed::PolyDatabase,
@ -720,7 +735,7 @@ fn word_pair_proximities_docids(
) -> anyhow::Result<()> ) -> anyhow::Result<()>
{ {
use heed::types::ByteSlice; use heed::types::ByteSlice;
use crate::RoaringBitmapCodec; use milli::RoaringBitmapCodec;
let stdout = io::stdout(); let stdout = io::stdout();
let mut wtr = csv::Writer::from_writer(stdout.lock()); let mut wtr = csv::Writer::from_writer(stdout.lock());

60
milli/Cargo.toml Normal file
View File

@ -0,0 +1,60 @@
[package]
name = "milli"
version = "0.1.0"
authors = ["Kerollmops <clement@meilisearch.com>"]
edition = "2018"
[dependencies]
anyhow = "1.0.28"
bstr = "0.2.13"
byteorder = "1.3.4"
crossbeam-channel = "0.5.0"
csv = "1.1.3"
either = "1.6.1"
flate2 = "1.0.17"
fst = "0.4.5"
fxhash = "0.2.1"
grenad = { git = "https://github.com/Kerollmops/grenad.git", rev = "3adcb26" }
heed = { version = "0.10.5", default-features = false, features = ["lmdb", "sync-read-txn"] }
human_format = "1.0.3"
levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] }
linked-hash-map = "0.5.3"
meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", branch = "main" }
memmap = "0.7.0"
num-traits = "0.2.14"
obkv = "0.1.0"
once_cell = "1.4.0"
ordered-float = "2.0.0"
rayon = "1.3.1"
regex = "1.4.2"
roaring = "0.6.4"
serde = { version = "1.0", features = ["derive"] }
serde_json = { version = "1.0.59", features = ["preserve_order"] }
smallstr = { version = "0.2.0", features = ["serde"] }
smallvec = "1.4.0"
tempfile = "3.1.0"
uuid = { version = "0.8.1", features = ["v4"] }
# facet filter parser
pest = { git = "https://github.com/pest-parser/pest.git", rev = "51fd1d49f1041f7839975664ef71fe15c7dcaf67" }
pest_derive = "2.1.0"
# documents words self-join
itertools = "0.9.0"
# logging
log = "0.4.11"
[dev-dependencies]
criterion = "0.3.3"
maplit = "1.0.2"
[build-dependencies]
fst = "0.4.5"
[features]
default = []
[[bench]]
name = "search"
harness = false

View File

@ -3,15 +3,14 @@
mod criterion; mod criterion;
mod external_documents_ids; mod external_documents_ids;
mod fields_ids_map; mod fields_ids_map;
mod index;
mod mdfs; mod mdfs;
mod query_tokens; mod query_tokens;
mod search; mod search;
mod update_store; mod update_store;
pub mod facet; pub mod facet;
pub mod heed_codec; pub mod heed_codec;
pub mod index;
pub mod proximity; pub mod proximity;
pub mod subcommand;
pub mod update; pub mod update;
use std::borrow::Cow; use std::borrow::Cow;

16
search/Cargo.toml Normal file
View File

@ -0,0 +1,16 @@
[package]
name = "search"
version = "0.1.0"
authors = ["Clément Renault <clement@meilisearch.com>"]
edition = "2018"
[dependencies]
anyhow = "1.0.28"
byte-unit = { version = "4.0.9", default-features = false, features = ["std"] }
heed = "0.10.5"
jemallocator = "0.3.2"
log = "0.4.11"
milli = { path = "../milli" }
serde_json = "1.0.59"
stderrlog = "0.5.0"
structopt = { version = "0.3.14", default-features = false }

View File

@ -8,7 +8,11 @@ use heed::EnvOpenOptions;
use log::debug; use log::debug;
use structopt::StructOpt; use structopt::StructOpt;
use crate::{Index, obkv_to_json}; use milli::{Index, obkv_to_json};
#[cfg(target_os = "linux")]
#[global_allocator]
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
#[derive(Debug, StructOpt)] #[derive(Debug, StructOpt)]
/// A simple search helper binary for the milli project. /// A simple search helper binary for the milli project.
@ -35,7 +39,18 @@ pub struct Opt {
print_facet_distribution: bool, print_facet_distribution: bool,
} }
pub fn run(opt: Opt) -> anyhow::Result<()> { fn main() -> Result<(), ()> {
let opt = Opt::from_args();
match run(opt) {
Ok(()) => Ok(()),
Err(e) => {
eprintln!("{}", e);
Err(())
},
}
}
fn run(opt: Opt) -> anyhow::Result<()> {
stderrlog::new() stderrlog::new()
.verbosity(opt.verbose) .verbosity(opt.verbose)
.show_level(false) .show_level(false)

View File

@ -1,22 +0,0 @@
use structopt::StructOpt;
use milli::subcommand::infos::{self, Opt as InfosOpt};
use milli::subcommand::search::{self, Opt as SearchOpt};
#[cfg(target_os = "linux")]
#[global_allocator]
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
#[derive(Debug, StructOpt)]
#[structopt(name = "milli", about = "The milli project.")]
enum Command {
Infos(InfosOpt),
Search(SearchOpt),
}
fn main() -> anyhow::Result<()> {
match Command::from_args() {
Command::Infos(opt) => infos::run(opt),
Command::Search(opt) => search::run(opt),
}
}

View File

@ -1,2 +0,0 @@
pub mod infos;
pub mod search;