mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-30 00:34:26 +01:00
Merge pull request #81 from meilisearch/smart-workspace
Change the project to become a workspace
This commit is contained in:
commit
5d0ac3e3e6
3
.gitignore
vendored
3
.gitignore
vendored
@ -2,9 +2,6 @@
|
|||||||
/target
|
/target
|
||||||
/Cargo.lock
|
/Cargo.lock
|
||||||
|
|
||||||
# the sub target folder
|
|
||||||
http-ui/target
|
|
||||||
|
|
||||||
# datasets
|
# datasets
|
||||||
*.csv
|
*.csv
|
||||||
*.mmdb
|
*.mmdb
|
||||||
|
1210
Cargo.lock
generated
1210
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
70
Cargo.toml
70
Cargo.toml
@ -1,70 +1,6 @@
|
|||||||
[package]
|
[workspace]
|
||||||
name = "milli"
|
members = ["milli", "http-ui", "infos", "search"]
|
||||||
version = "0.1.0"
|
default-members = ["milli"]
|
||||||
authors = ["Kerollmops <clement@meilisearch.com>"]
|
|
||||||
edition = "2018"
|
|
||||||
|
|
||||||
[dependencies]
|
|
||||||
anyhow = "1.0.28"
|
|
||||||
bstr = "0.2.13"
|
|
||||||
byte-unit = { version = "4.0.9", default-features = false, features = ["std"] }
|
|
||||||
byteorder = "1.3.4"
|
|
||||||
crossbeam-channel = "0.5.0"
|
|
||||||
csv = "1.1.3"
|
|
||||||
either = "1.6.1"
|
|
||||||
flate2 = "1.0.17"
|
|
||||||
fst = "0.4.5"
|
|
||||||
fxhash = "0.2.1"
|
|
||||||
grenad = { git = "https://github.com/Kerollmops/grenad.git", rev = "3adcb26" }
|
|
||||||
heed = { version = "0.10.5", default-features = false, features = ["lmdb", "sync-read-txn"] }
|
|
||||||
human_format = "1.0.3"
|
|
||||||
jemallocator = "0.3.2"
|
|
||||||
levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] }
|
|
||||||
linked-hash-map = "0.5.3"
|
|
||||||
meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", branch = "main" }
|
|
||||||
memmap = "0.7.0"
|
|
||||||
near-proximity = { git = "https://github.com/Kerollmops/plane-sweep-proximity", rev = "6608205" }
|
|
||||||
num-traits = "0.2.14"
|
|
||||||
obkv = "0.1.0"
|
|
||||||
once_cell = "1.4.0"
|
|
||||||
ordered-float = "2.0.0"
|
|
||||||
rayon = "1.3.1"
|
|
||||||
regex = "1.4.2"
|
|
||||||
ringtail = "0.3.0"
|
|
||||||
roaring = "0.6.4"
|
|
||||||
serde = { version = "1.0", features = ["derive"] }
|
|
||||||
serde_json = { version = "1.0.59", features = ["preserve_order"] }
|
|
||||||
slice-group-by = "0.2.6"
|
|
||||||
smallstr = { version = "0.2.0", features = ["serde"] }
|
|
||||||
smallvec = "1.4.0"
|
|
||||||
structopt = { version = "0.3.14", default-features = false, features = ["wrap_help"] }
|
|
||||||
tempfile = "3.1.0"
|
|
||||||
uuid = { version = "0.8.1", features = ["v4"] }
|
|
||||||
|
|
||||||
# facet filter parser
|
|
||||||
pest = { git = "https://github.com/pest-parser/pest.git", rev = "51fd1d49f1041f7839975664ef71fe15c7dcaf67" }
|
|
||||||
pest_derive = "2.1.0"
|
|
||||||
|
|
||||||
# documents words self-join
|
|
||||||
itertools = "0.9.0"
|
|
||||||
|
|
||||||
# logging
|
|
||||||
log = "0.4.11"
|
|
||||||
stderrlog = "0.5.0"
|
|
||||||
|
|
||||||
[dev-dependencies]
|
|
||||||
criterion = "0.3.3"
|
|
||||||
maplit = "1.0.2"
|
|
||||||
|
|
||||||
[build-dependencies]
|
|
||||||
fst = "0.4.5"
|
|
||||||
|
|
||||||
[features]
|
|
||||||
default = []
|
|
||||||
|
|
||||||
[[bench]]
|
|
||||||
name = "search"
|
|
||||||
harness = false
|
|
||||||
|
|
||||||
[profile.release]
|
[profile.release]
|
||||||
debug = true
|
debug = true
|
||||||
|
2530
http-ui/Cargo.lock
generated
2530
http-ui/Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@ -12,7 +12,7 @@ grenad = { git = "https://github.com/Kerollmops/grenad.git", rev = "3adcb26" }
|
|||||||
heed = "0.10.5"
|
heed = "0.10.5"
|
||||||
meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", branch = "main" }
|
meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", branch = "main" }
|
||||||
memmap = "0.7.0"
|
memmap = "0.7.0"
|
||||||
milli = { path = ".." }
|
milli = { path = "../milli" }
|
||||||
once_cell = "1.4.1"
|
once_cell = "1.4.1"
|
||||||
rayon = "1.5.0"
|
rayon = "1.5.0"
|
||||||
structopt = { version = "0.3.14", default-features = false, features = ["wrap_help"] }
|
structopt = { version = "0.3.14", default-features = false, features = ["wrap_help"] }
|
||||||
@ -34,3 +34,6 @@ warp = "0.2.2"
|
|||||||
log = "0.4.11"
|
log = "0.4.11"
|
||||||
stderrlog = "0.5.0"
|
stderrlog = "0.5.0"
|
||||||
fst = "0.4.5"
|
fst = "0.4.5"
|
||||||
|
|
||||||
|
# Temporary fix for bitvec, remove once fixed. (https://github.com/bitvecto-rs/bitvec/issues/105)
|
||||||
|
funty = "=1.1.0"
|
||||||
|
17
infos/Cargo.toml
Normal file
17
infos/Cargo.toml
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
[package]
|
||||||
|
name = "infos"
|
||||||
|
version = "0.1.0"
|
||||||
|
authors = ["Clément Renault <clement@meilisearch.com>"]
|
||||||
|
edition = "2018"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
anyhow = "1.0.28"
|
||||||
|
byte-unit = { version = "4.0.9", default-features = false, features = ["std"] }
|
||||||
|
csv = "1.1.3"
|
||||||
|
heed = "0.10.5"
|
||||||
|
jemallocator = "0.3.2"
|
||||||
|
milli = { path = "../milli" }
|
||||||
|
roaring = "0.6.4"
|
||||||
|
serde_json = "1.0.59"
|
||||||
|
stderrlog = "0.5.0"
|
||||||
|
structopt = { version = "0.3.14", default-features = false }
|
@ -4,12 +4,16 @@ use std::{str, io, fmt};
|
|||||||
|
|
||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
use byte_unit::Byte;
|
use byte_unit::Byte;
|
||||||
use crate::Index;
|
|
||||||
use heed::EnvOpenOptions;
|
use heed::EnvOpenOptions;
|
||||||
|
use milli::Index;
|
||||||
use structopt::StructOpt;
|
use structopt::StructOpt;
|
||||||
|
|
||||||
use Command::*;
|
use Command::*;
|
||||||
|
|
||||||
|
#[cfg(target_os = "linux")]
|
||||||
|
#[global_allocator]
|
||||||
|
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
|
||||||
|
|
||||||
const MAIN_DB_NAME: &str = "main";
|
const MAIN_DB_NAME: &str = "main";
|
||||||
const WORD_DOCIDS_DB_NAME: &str = "word-docids";
|
const WORD_DOCIDS_DB_NAME: &str = "word-docids";
|
||||||
const DOCID_WORD_POSITIONS_DB_NAME: &str = "docid-word-positions";
|
const DOCID_WORD_POSITIONS_DB_NAME: &str = "docid-word-positions";
|
||||||
@ -153,7 +157,18 @@ enum Command {
|
|||||||
PatchToNewExternalIds,
|
PatchToNewExternalIds,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn run(opt: Opt) -> anyhow::Result<()> {
|
fn main() -> Result<(), ()> {
|
||||||
|
let opt = Opt::from_args();
|
||||||
|
match run(opt) {
|
||||||
|
Ok(()) => Ok(()),
|
||||||
|
Err(e) => {
|
||||||
|
eprintln!("{}", e);
|
||||||
|
Err(())
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run(opt: Opt) -> anyhow::Result<()> {
|
||||||
stderrlog::new()
|
stderrlog::new()
|
||||||
.verbosity(opt.verbose)
|
.verbosity(opt.verbose)
|
||||||
.show_level(false)
|
.show_level(false)
|
||||||
@ -204,7 +219,7 @@ fn patch_to_new_external_ids(index: &Index, wtxn: &mut heed::RwTxn) -> anyhow::R
|
|||||||
let documents_ids = documents_ids.to_owned();
|
let documents_ids = documents_ids.to_owned();
|
||||||
index.main.put::<_, ByteSlice, ByteSlice>(
|
index.main.put::<_, ByteSlice, ByteSlice>(
|
||||||
wtxn,
|
wtxn,
|
||||||
crate::index::HARD_EXTERNAL_DOCUMENTS_IDS_KEY.as_bytes(),
|
milli::index::HARD_EXTERNAL_DOCUMENTS_IDS_KEY.as_bytes(),
|
||||||
&documents_ids,
|
&documents_ids,
|
||||||
)?;
|
)?;
|
||||||
index.main.delete::<_, ByteSlice>(wtxn, USERS_IDS_DOCUMENTS_IDS)?;
|
index.main.delete::<_, ByteSlice>(wtxn, USERS_IDS_DOCUMENTS_IDS)?;
|
||||||
@ -242,7 +257,7 @@ fn facet_values_iter<'txn, DC: 'txn, T>(
|
|||||||
rtxn: &'txn heed::RoTxn,
|
rtxn: &'txn heed::RoTxn,
|
||||||
db: heed::Database<heed::types::ByteSlice, DC>,
|
db: heed::Database<heed::types::ByteSlice, DC>,
|
||||||
field_id: u8,
|
field_id: u8,
|
||||||
facet_type: crate::facet::FacetType,
|
facet_type: milli::facet::FacetType,
|
||||||
string_fn: impl Fn(&str) -> T + 'txn,
|
string_fn: impl Fn(&str) -> T + 'txn,
|
||||||
float_fn: impl Fn(u8, f64, f64) -> T + 'txn,
|
float_fn: impl Fn(u8, f64, f64) -> T + 'txn,
|
||||||
integer_fn: impl Fn(u8, i64, i64) -> T + 'txn,
|
integer_fn: impl Fn(u8, i64, i64) -> T + 'txn,
|
||||||
@ -250,8 +265,8 @@ fn facet_values_iter<'txn, DC: 'txn, T>(
|
|||||||
where
|
where
|
||||||
DC: heed::BytesDecode<'txn>,
|
DC: heed::BytesDecode<'txn>,
|
||||||
{
|
{
|
||||||
use crate::facet::FacetType;
|
use milli::facet::FacetType;
|
||||||
use crate::heed_codec::facet::{
|
use milli::heed_codec::facet::{
|
||||||
FacetValueStringCodec, FacetLevelValueF64Codec, FacetLevelValueI64Codec,
|
FacetValueStringCodec, FacetLevelValueF64Codec, FacetLevelValueI64Codec,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -504,7 +519,7 @@ fn export_words_fst(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> {
|
|||||||
|
|
||||||
fn export_documents(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> {
|
fn export_documents(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> {
|
||||||
use std::io::{BufWriter, Write as _};
|
use std::io::{BufWriter, Write as _};
|
||||||
use crate::obkv_to_json;
|
use milli::obkv_to_json;
|
||||||
|
|
||||||
let stdout = io::stdout();
|
let stdout = io::stdout();
|
||||||
let mut out = BufWriter::new(stdout);
|
let mut out = BufWriter::new(stdout);
|
||||||
@ -548,7 +563,7 @@ fn total_docid_word_positions_size(index: &Index, rtxn: &heed::RoTxn) -> anyhow:
|
|||||||
|
|
||||||
fn average_number_of_words_by_doc(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> {
|
fn average_number_of_words_by_doc(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> {
|
||||||
use heed::types::DecodeIgnore;
|
use heed::types::DecodeIgnore;
|
||||||
use crate::{DocumentId, BEU32StrCodec};
|
use milli::{DocumentId, BEU32StrCodec};
|
||||||
|
|
||||||
let mut words_counts = Vec::new();
|
let mut words_counts = Vec::new();
|
||||||
let mut count = 0;
|
let mut count = 0;
|
||||||
@ -587,7 +602,7 @@ fn average_number_of_words_by_doc(index: &Index, rtxn: &heed::RoTxn) -> anyhow::
|
|||||||
|
|
||||||
fn average_number_of_positions_by_word(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> {
|
fn average_number_of_positions_by_word(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> {
|
||||||
use heed::types::DecodeIgnore;
|
use heed::types::DecodeIgnore;
|
||||||
use crate::BoRoaringBitmapCodec;
|
use milli::BoRoaringBitmapCodec;
|
||||||
|
|
||||||
let mut values_length = Vec::new();
|
let mut values_length = Vec::new();
|
||||||
let mut count = 0;
|
let mut count = 0;
|
||||||
@ -639,7 +654,7 @@ fn database_stats(index: &Index, rtxn: &heed::RoTxn, name: &str) -> anyhow::Resu
|
|||||||
use heed::types::ByteSlice;
|
use heed::types::ByteSlice;
|
||||||
use heed::{Error, BytesDecode};
|
use heed::{Error, BytesDecode};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use crate::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec};
|
use milli::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec};
|
||||||
|
|
||||||
fn compute_stats<'a, DC: BytesDecode<'a, DItem = RoaringBitmap>>(
|
fn compute_stats<'a, DC: BytesDecode<'a, DItem = RoaringBitmap>>(
|
||||||
db: heed::PolyDatabase,
|
db: heed::PolyDatabase,
|
||||||
@ -720,7 +735,7 @@ fn word_pair_proximities_docids(
|
|||||||
) -> anyhow::Result<()>
|
) -> anyhow::Result<()>
|
||||||
{
|
{
|
||||||
use heed::types::ByteSlice;
|
use heed::types::ByteSlice;
|
||||||
use crate::RoaringBitmapCodec;
|
use milli::RoaringBitmapCodec;
|
||||||
|
|
||||||
let stdout = io::stdout();
|
let stdout = io::stdout();
|
||||||
let mut wtr = csv::Writer::from_writer(stdout.lock());
|
let mut wtr = csv::Writer::from_writer(stdout.lock());
|
60
milli/Cargo.toml
Normal file
60
milli/Cargo.toml
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
[package]
|
||||||
|
name = "milli"
|
||||||
|
version = "0.1.0"
|
||||||
|
authors = ["Kerollmops <clement@meilisearch.com>"]
|
||||||
|
edition = "2018"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
anyhow = "1.0.28"
|
||||||
|
bstr = "0.2.13"
|
||||||
|
byteorder = "1.3.4"
|
||||||
|
crossbeam-channel = "0.5.0"
|
||||||
|
csv = "1.1.3"
|
||||||
|
either = "1.6.1"
|
||||||
|
flate2 = "1.0.17"
|
||||||
|
fst = "0.4.5"
|
||||||
|
fxhash = "0.2.1"
|
||||||
|
grenad = { git = "https://github.com/Kerollmops/grenad.git", rev = "3adcb26" }
|
||||||
|
heed = { version = "0.10.5", default-features = false, features = ["lmdb", "sync-read-txn"] }
|
||||||
|
human_format = "1.0.3"
|
||||||
|
levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] }
|
||||||
|
linked-hash-map = "0.5.3"
|
||||||
|
meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", branch = "main" }
|
||||||
|
memmap = "0.7.0"
|
||||||
|
num-traits = "0.2.14"
|
||||||
|
obkv = "0.1.0"
|
||||||
|
once_cell = "1.4.0"
|
||||||
|
ordered-float = "2.0.0"
|
||||||
|
rayon = "1.3.1"
|
||||||
|
regex = "1.4.2"
|
||||||
|
roaring = "0.6.4"
|
||||||
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
|
serde_json = { version = "1.0.59", features = ["preserve_order"] }
|
||||||
|
smallstr = { version = "0.2.0", features = ["serde"] }
|
||||||
|
smallvec = "1.4.0"
|
||||||
|
tempfile = "3.1.0"
|
||||||
|
uuid = { version = "0.8.1", features = ["v4"] }
|
||||||
|
|
||||||
|
# facet filter parser
|
||||||
|
pest = { git = "https://github.com/pest-parser/pest.git", rev = "51fd1d49f1041f7839975664ef71fe15c7dcaf67" }
|
||||||
|
pest_derive = "2.1.0"
|
||||||
|
|
||||||
|
# documents words self-join
|
||||||
|
itertools = "0.9.0"
|
||||||
|
|
||||||
|
# logging
|
||||||
|
log = "0.4.11"
|
||||||
|
|
||||||
|
[dev-dependencies]
|
||||||
|
criterion = "0.3.3"
|
||||||
|
maplit = "1.0.2"
|
||||||
|
|
||||||
|
[build-dependencies]
|
||||||
|
fst = "0.4.5"
|
||||||
|
|
||||||
|
[features]
|
||||||
|
default = []
|
||||||
|
|
||||||
|
[[bench]]
|
||||||
|
name = "search"
|
||||||
|
harness = false
|
@ -3,15 +3,14 @@
|
|||||||
mod criterion;
|
mod criterion;
|
||||||
mod external_documents_ids;
|
mod external_documents_ids;
|
||||||
mod fields_ids_map;
|
mod fields_ids_map;
|
||||||
mod index;
|
|
||||||
mod mdfs;
|
mod mdfs;
|
||||||
mod query_tokens;
|
mod query_tokens;
|
||||||
mod search;
|
mod search;
|
||||||
mod update_store;
|
mod update_store;
|
||||||
pub mod facet;
|
pub mod facet;
|
||||||
pub mod heed_codec;
|
pub mod heed_codec;
|
||||||
|
pub mod index;
|
||||||
pub mod proximity;
|
pub mod proximity;
|
||||||
pub mod subcommand;
|
|
||||||
pub mod update;
|
pub mod update;
|
||||||
|
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
16
search/Cargo.toml
Normal file
16
search/Cargo.toml
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
[package]
|
||||||
|
name = "search"
|
||||||
|
version = "0.1.0"
|
||||||
|
authors = ["Clément Renault <clement@meilisearch.com>"]
|
||||||
|
edition = "2018"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
anyhow = "1.0.28"
|
||||||
|
byte-unit = { version = "4.0.9", default-features = false, features = ["std"] }
|
||||||
|
heed = "0.10.5"
|
||||||
|
jemallocator = "0.3.2"
|
||||||
|
log = "0.4.11"
|
||||||
|
milli = { path = "../milli" }
|
||||||
|
serde_json = "1.0.59"
|
||||||
|
stderrlog = "0.5.0"
|
||||||
|
structopt = { version = "0.3.14", default-features = false }
|
@ -8,7 +8,11 @@ use heed::EnvOpenOptions;
|
|||||||
use log::debug;
|
use log::debug;
|
||||||
use structopt::StructOpt;
|
use structopt::StructOpt;
|
||||||
|
|
||||||
use crate::{Index, obkv_to_json};
|
use milli::{Index, obkv_to_json};
|
||||||
|
|
||||||
|
#[cfg(target_os = "linux")]
|
||||||
|
#[global_allocator]
|
||||||
|
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
|
||||||
|
|
||||||
#[derive(Debug, StructOpt)]
|
#[derive(Debug, StructOpt)]
|
||||||
/// A simple search helper binary for the milli project.
|
/// A simple search helper binary for the milli project.
|
||||||
@ -35,7 +39,18 @@ pub struct Opt {
|
|||||||
print_facet_distribution: bool,
|
print_facet_distribution: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn run(opt: Opt) -> anyhow::Result<()> {
|
fn main() -> Result<(), ()> {
|
||||||
|
let opt = Opt::from_args();
|
||||||
|
match run(opt) {
|
||||||
|
Ok(()) => Ok(()),
|
||||||
|
Err(e) => {
|
||||||
|
eprintln!("{}", e);
|
||||||
|
Err(())
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run(opt: Opt) -> anyhow::Result<()> {
|
||||||
stderrlog::new()
|
stderrlog::new()
|
||||||
.verbosity(opt.verbose)
|
.verbosity(opt.verbose)
|
||||||
.show_level(false)
|
.show_level(false)
|
22
src/main.rs
22
src/main.rs
@ -1,22 +0,0 @@
|
|||||||
use structopt::StructOpt;
|
|
||||||
|
|
||||||
use milli::subcommand::infos::{self, Opt as InfosOpt};
|
|
||||||
use milli::subcommand::search::{self, Opt as SearchOpt};
|
|
||||||
|
|
||||||
#[cfg(target_os = "linux")]
|
|
||||||
#[global_allocator]
|
|
||||||
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
|
|
||||||
|
|
||||||
#[derive(Debug, StructOpt)]
|
|
||||||
#[structopt(name = "milli", about = "The milli project.")]
|
|
||||||
enum Command {
|
|
||||||
Infos(InfosOpt),
|
|
||||||
Search(SearchOpt),
|
|
||||||
}
|
|
||||||
|
|
||||||
fn main() -> anyhow::Result<()> {
|
|
||||||
match Command::from_args() {
|
|
||||||
Command::Infos(opt) => infos::run(opt),
|
|
||||||
Command::Search(opt) => search::run(opt),
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,2 +0,0 @@
|
|||||||
pub mod infos;
|
|
||||||
pub mod search;
|
|
Loading…
Reference in New Issue
Block a user