Remove unused infos/ http-ui/ and fuzz/ crates

This commit is contained in:
Loïc Lecrenier 2022-09-13 10:46:56 +02:00
parent 4fc6331cb6
commit add96f921b
23 changed files with 2 additions and 3637 deletions

View File

@ -45,19 +45,6 @@ We recommend using the `--release` flag to test the full performance.
cargo test
```
### Querying the engine via the web interface
To help you develop your feature you might need to use a web interface! You can query the engine by going to [the HTML page itself](http://127.0.0.1:9700).
### Compile and run the HTTP debug server
You can specify the number of threads to use to index documents and many other settings too.
```bash
cd http-ui
cargo run --release -- --db my-database.mdb -vvv --indexing-jobs 8
```
### Index your documents
It can index a massive amount of documents in not much time, I already achieved to index:

View File

@ -1,6 +1,6 @@
[workspace]
resolver = "2"
members = ["milli", "filter-parser", "flatten-serde-json", "json-depth-checker", "http-ui", "benchmarks", "infos", "helpers", "cli"]
members = ["milli", "filter-parser", "flatten-serde-json", "json-depth-checker", "benchmarks", "helpers", "cli"]
default-members = ["milli"]
[profile.dev]

View File

@ -1,5 +1,5 @@
<p align="center">
<img alt="the milli logo" src="http-ui/public/logo-black.svg">
<img alt="the milli logo" src="logo-black.svg">
</p>
<p align="center">a concurrent indexer combined with fast and relevant search algorithms</p>
@ -19,8 +19,6 @@ This repository contains crates to quickly debug the engine:
- The `filter-parser` crate contains the parser for the Meilisearch filter syntax.
- The `flatten-serde-json` crate contains the library that flattens serde-json `Value` objects like Elasticsearch does.
- The `helpers` crate is only used to do operations on the database.
- The `http-ui` crate is a simple HTTP dashboard to test the features like for real!
- The `infos` crate is used to dump the internal data-structure and ensure correctness.
- The `json-depth-checker` crate is used to indicate if a JSON must be flattened.
## How to use it?

View File

@ -1,47 +0,0 @@
[package]
name = "http-ui"
description = "The HTTP user interface of the milli search engine"
version = "0.33.4"
authors = ["Clément Renault <clement@meilisearch.com>"]
edition = "2018"
publish = false
[dependencies]
anyhow = "1.0.56"
byte-unit = { version = "4.0.14", default-features = false, features = ["std"] }
crossbeam-channel = "0.5.2"
memmap2 = "0.5.3"
milli = { path = "../milli" }
mimalloc = { version = "0.1.29", default-features = false }
once_cell = "1.10.0"
rayon = "1.5.1"
structopt = { version = "0.3.26", default-features = false, features = ["wrap_help"] }
tempfile = "3.3.0"
# http server
askama = "0.11.1"
askama_warp = "0.12.0"
bytes = "1.1.0"
either = "1.6.1"
flate2 = "1.0.22"
futures = "0.3.21"
serde = { version = "1.0.136", features = ["derive"] }
serde_json = { version = "1.0.79", features = ["preserve_order"] }
tokio = { version = "1.17.0", features = ["full"] }
tokio-stream = { version = "0.1.8", default-features = false, features = ["sync"] }
warp = "0.3.2"
# logging
fst = "0.4.7"
log = "0.4.17"
stderrlog = "0.5.1"
# Temporary fix for bitvec, remove once fixed. (https://github.com/bitvecto-rs/bitvec/issues/105)
bimap = "0.6.2"
csv = "1.1.6"
funty = "2.0.0"
[dev-dependencies]
maplit = "1.0.2"
serde_test = "1.0.136"

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -1,5 +0,0 @@
/*
2020 Jason Mulligan <jason.mulligan@avoidwork.com>
@version 6.1.0
*/
"use strict";!function(e){var x=/^(b|B)$/,M={iec:{bits:["b","Kib","Mib","Gib","Tib","Pib","Eib","Zib","Yib"],bytes:["B","KiB","MiB","GiB","TiB","PiB","EiB","ZiB","YiB"]},jedec:{bits:["b","Kb","Mb","Gb","Tb","Pb","Eb","Zb","Yb"],bytes:["B","KB","MB","GB","TB","PB","EB","ZB","YB"]}},w={iec:["","kibi","mebi","gibi","tebi","pebi","exbi","zebi","yobi"],jedec:["","kilo","mega","giga","tera","peta","exa","zetta","yotta"]};function t(e){var i,t,o,n,b,r,a,l,s,d,u,c,f,p,B,y=1<arguments.length&&void 0!==arguments[1]?arguments[1]:{},g=[],v=0,m=void 0,h=void 0;if(isNaN(e))throw new TypeError("Invalid number");return t=!0===y.bits,u=!0===y.unix,i=y.base||2,d=void 0!==y.round?y.round:u?1:2,r=void 0!==y.locale?y.locale:"",a=y.localeOptions||{},c=void 0!==y.separator?y.separator:"",f=void 0!==y.spacer?y.spacer:u?"":" ",B=y.symbols||{},p=2===i&&y.standard||"jedec",s=y.output||"string",n=!0===y.fullform,b=y.fullforms instanceof Array?y.fullforms:[],m=void 0!==y.exponent?y.exponent:-1,o=2<i?1e3:1024,(l=(h=Number(e))<0)&&(h=-h),(-1===m||isNaN(m))&&(m=Math.floor(Math.log(h)/Math.log(o)))<0&&(m=0),8<m&&(m=8),"exponent"===s?m:(0===h?(g[0]=0,g[1]=u?"":M[p][t?"bits":"bytes"][m]):(v=h/(2===i?Math.pow(2,10*m):Math.pow(1e3,m)),t&&o<=(v*=8)&&m<8&&(v/=o,m++),g[0]=Number(v.toFixed(0<m?d:0)),g[0]===o&&m<8&&void 0===y.exponent&&(g[0]=1,m++),g[1]=10===i&&1===m?t?"kb":"kB":M[p][t?"bits":"bytes"][m],u&&(g[1]="jedec"===p?g[1].charAt(0):0<m?g[1].replace(/B$/,""):g[1],x.test(g[1])&&(g[0]=Math.floor(g[0]),g[1]=""))),l&&(g[0]=-g[0]),g[1]=B[g[1]]||g[1],!0===r?g[0]=g[0].toLocaleString():0<r.length?g[0]=g[0].toLocaleString(r,a):0<c.length&&(g[0]=g[0].toString().replace(".",c)),"array"===s?g:(n&&(g[1]=b[m]?b[m]:w[p][m]+(t?"bit":"byte")+(1===g[0]?"":"s")),"object"===s?{value:g[0],symbol:g[1],exponent:m}:g.join(f)))}t.partial=function(i){return function(e){return t(e,i)}},"undefined"!=typeof exports?module.exports=t:"function"==typeof define&&void 0!==define.amd?define(function(){return t}):e.filesize=t}("undefined"!=typeof window?window:global);

File diff suppressed because one or more lines are too long

View File

@ -1,6 +0,0 @@
<svg width="277" height="236" viewBox="0 0 277 236" fill="none" xmlns="http://www.w3.org/2000/svg">
<path fill-rule="evenodd" clip-rule="evenodd" d="M213.085 190L242.907 86H276.196L246.375 190H213.085Z" fill="#494949"/>
<path fill-rule="evenodd" clip-rule="evenodd" d="M0 190L29.8215 86H63.1111L33.2896 190H0Z" fill="#494949"/>
<path fill-rule="evenodd" clip-rule="evenodd" d="M124.986 0L57.5772 235.083L60.7752 236H90.6038L158.276 0H124.986Z" fill="#494949"/>
<path fill-rule="evenodd" clip-rule="evenodd" d="M195.273 0L127.601 236H160.891L228.563 0H195.273Z" fill="#494949"/>
</svg>

Before

Width:  |  Height:  |  Size: 585 B

View File

@ -1,6 +0,0 @@
<svg width="277" height="236" viewBox="0 0 277 236" fill="none" xmlns="http://www.w3.org/2000/svg">
<path fill-rule="evenodd" clip-rule="evenodd" d="M213.508 190L243.33 86H276.619L246.798 190H213.508Z" fill="#B5B5B5"/>
<path fill-rule="evenodd" clip-rule="evenodd" d="M0.422791 190L30.2443 86H63.5339L33.7124 190H0.422791Z" fill="#B5B5B5"/>
<path fill-rule="evenodd" clip-rule="evenodd" d="M125.409 0L58 235.083L61.198 236H91.0266L158.699 0H125.409Z" fill="#B5B5B5"/>
<path fill-rule="evenodd" clip-rule="evenodd" d="M195.696 0L128.024 236H161.313L228.985 0H195.696Z" fill="#B5B5B5"/>
</svg>

Before

Width:  |  Height:  |  Size: 592 B

View File

@ -1,154 +0,0 @@
var request = null;
var timeoutID = null;
var display_facets = false;
$('#query, #filters').on('input', function () {
var query = $('#query').val();
var filters = $('#filters').val();
var timeoutMs = 100;
if (timeoutID !== null) {
window.clearTimeout(timeoutID);
}
timeoutID = window.setTimeout(function () {
request = $.ajax({
type: "POST",
url: "query",
contentType: 'application/json',
data: JSON.stringify({
'query': query,
'filters': filters,
"facetDistribution": display_facets,
}),
contentType: 'application/json',
success: function (data, textStatus, request) {
results.innerHTML = '';
facets.innerHTML = '';
let timeSpent = request.getResponseHeader('Time-Ms');
let numberOfDocuments = data.documents.length;
count.innerHTML = data.numberOfCandidates.toLocaleString();
time.innerHTML = `${timeSpent}ms`;
time.classList.remove('fade-in-out');
for (facet_name in data.facets) {
for (value in data.facets[facet_name]) {
const elem = document.createElement('span');
const count = data.facets[facet_name][value];
elem.classList.add("tag");
elem.setAttribute('data-name', facet_name);
elem.setAttribute('data-value', value);
elem.innerHTML = `${facet_name}:${value} (${count})`;
facets.appendChild(elem);
}
}
for (element of data.documents) {
const elem = document.createElement('li');
elem.classList.add("document");
const ol = document.createElement('ol');
for (const prop in element) {
const field = document.createElement('li');
field.classList.add("field");
const attribute = document.createElement('div');
attribute.classList.add("attribute");
attribute.innerHTML = prop;
const content = document.createElement('div');
content.classList.add("content");
// Stringify Objects and Arrays to avoid [Object object]
if (typeof element[prop] === 'object' && element[prop] !== null) {
content.innerHTML = JSON.stringify(element[prop]);
} else {
content.innerHTML = element[prop];
}
field.appendChild(attribute);
field.appendChild(content);
ol.appendChild(field);
}
elem.appendChild(ol);
results.appendChild(elem);
}
// When we click on a tag we append the facet value
// at the end of the facet query.
$('#facets .tag').on('click', function () {
let name = $(this).attr("data-name");
let value = $(this).attr("data-value");
let facet_query = $('#filters').val().trim();
if (facet_query === "") {
$('#filters').val(`${name} = "${value}"`).trigger('input');
} else {
$('#filters').val(`${facet_query} AND ${name} = "${value}"`).trigger('input');
}
});
},
beforeSend: function () {
if (request !== null) {
request.abort();
time.classList.add('fade-in-out');
}
},
});
}, timeoutMs);
});
function diffArray(arr1, arr2) {
return arr1.concat(arr2).filter(function (val) {
if (!(arr1.includes(val) && arr2.includes(val)))
return val;
});
}
function selectedFacetsToArray(facets_obj) {
var array = [];
for (const facet_name in facets_obj) {
var subarray = [];
for (const facet_value of facets_obj[facet_name]) {
subarray.push(`${facet_name}:${facet_value}`);
}
array.push(subarray);
}
return array;
}
$('#display_facets').click(function() {
if (display_facets) {
display_facets = false;
$('#display_facets').html("Display facets")
$('#display_facets').removeClass("is-danger");
$('#display_facets').addClass("is-success");
$('#facets').hide();
} else {
display_facets = true;
$('#display_facets').html("Hide facets")
$('#display_facets').addClass("is-danger");
$('#display_facets').removeClass("is-success");
$('#facets').show();
}
});
// Make the number of document a little bit prettier
$('#docs-count').text(function(index, text) {
return parseInt(text).toLocaleString()
});
// Make the database a little bit easier to read
$('#db-size').text(function(index, text) {
return filesize(parseInt(text))
});
// We trigger the input when we load the script.
$(window).on('load', function () {
// We execute a placeholder search when the input is empty.
$('#query').trigger('input');
});

View File

@ -1,144 +0,0 @@
#results {
max-width: 900px;
margin: 20px auto 0 auto;
padding: 0;
}
#facets .tag {
margin-right: 1em;
margin-bottom: 1em;
}
#facets {
display: none;
max-width: 900px;
margin: 20px auto 0 auto;
padding: 0;
max-height: 16em;
overflow: scroll;
}
#display_facets {
margin: 20px auto 0 auto;
padding: 5px;
max-height: 16em;
overflow: scroll;
}
#facets .tag:hover {
cursor: pointer;
}
#logo-white {
display: none;
}
#logo-black {
display: inherit;
}
.notification {
display: flex;
justify-content: center;
}
.document {
padding: 20px 20px;
background-color: #f5f5f5;
border-radius: 4px;
margin-bottom: 20px;
display: flex;
}
.document ol {
flex: 0 0 75%;
max-width: 75%;
padding: 0;
margin: 0;
}
.document .image {
max-width: 25%;
flex: 0 0 25%;
padding-left: 30px;
box-sizing: border-box;
}
.document .image img {
width: 100%;
}
.field {
list-style-type: none;
display: flex;
flex-wrap: wrap;
}
.field:not(:last-child) {
margin-bottom: 7px;
}
.attribute {
flex: 0 0 35%;
max-width: 35%;
text-align: right;
padding-right: 10px;
box-sizing: border-box;
text-transform: uppercase;
opacity: 0.7;
}
.content {
max-width: 65%;
flex: 0 0 65%;
box-sizing: border-box;
padding-left: 10px;
color: rgba(0,0,0,.9);
}
.content mark {
background-color: hsl(204, 86%, 88%);
color: hsl(204, 86%, 25%);
}
@keyframes fadeInOut {
0% { opacity: 1; }
30% { opacity: 0.3; }
100% { opacity: 1; }
}
.fade-in-out {
animation: fadeInOut ease 1s infinite;
}
@media (prefers-color-scheme:dark) {
#logo-white {
display: inherit;
}
#logo-black {
display: none;
}
.hero.is-light {
background-color: #242424;
color: inherit;
}
.hero.is-light .title {
color: inherit;
}
.document {
background-color: #242424;
}
.content {
color: #dbdbdb;
}
.content mark {
background-color: hsl(0, 0%, 35%);
color: hsl(0,0%,90.2%);
}
}

View File

@ -1,102 +0,0 @@
$(window).on('load', function () {
let wsProtcol = "ws";
if (window.location.protocol === 'https') {
wsProtcol = 'wss';
}
let url = wsProtcol + '://' + window.location.hostname + ':' + window.location.port + '/updates/ws';
var socket = new WebSocket(url);
socket.onmessage = function (event) {
let status = JSON.parse(event.data);
if (status.type == 'Pending') {
const elem = document.createElement('li');
elem.classList.add("document");
elem.setAttribute("id", 'update-' + status.update_id);
const ol = document.createElement('ol');
const field = document.createElement('li');
field.classList.add("field");
const attributeUpdateId = document.createElement('div');
attributeUpdateId.classList.add("attribute");
attributeUpdateId.innerHTML = "update id";
const contentUpdateId = document.createElement('div');
contentUpdateId.classList.add("updateId");
contentUpdateId.classList.add("content");
contentUpdateId.innerHTML = status.update_id;
field.appendChild(attributeUpdateId);
field.appendChild(contentUpdateId);
const attributeUpdateStatus = document.createElement('div');
attributeUpdateStatus.classList.add("attribute");
attributeUpdateStatus.innerHTML = "update status";
const contentUpdateStatus = document.createElement('div');
contentUpdateStatus.classList.add("updateStatus");
contentUpdateStatus.classList.add("content");
contentUpdateStatus.innerHTML = 'pending';
field.appendChild(attributeUpdateStatus);
field.appendChild(contentUpdateStatus);
ol.appendChild(field);
elem.appendChild(ol);
prependChild(results, elem);
}
if (status.type == "Progressing") {
const id = 'update-' + status.update_id;
const content = $(`#${id} .updateStatus.content`);
let html;
let { type, step, total_steps, current, total } = status.meta;
if (type === 'DocumentsAddition') {
// If the total is null or undefined then the progress results is infinity.
let progress = Math.round(current / total * 100);
// We must divide the progress by the total number of indexing steps.
progress = progress / total_steps;
// And mark the previous steps as processed.
progress = progress + (step * 100 / total_steps);
// Generate the appropriate html bulma progress bar.
html = `<progress class="progress" title="${progress}%" value="${progress}" max="100"></progress>`;
} else {
html = `<progress class="progress" max="100"></progress>`;
}
content.html(html);
}
if (status.type == "Processed") {
const id = 'update-' + status.update_id;
const content = $(`#${id} .updateStatus.content`);
content.html('processed ' + JSON.stringify(status.meta));
}
if (status.type == "Aborted") {
const id = 'update-' + status.update_id;
const content = $(`#${id} .updateStatus.content`);
content.html('aborted ' + JSON.stringify(status.meta));
}
}
});
function prependChild(parent, newFirstChild) {
parent.insertBefore(newFirstChild, parent.firstChild)
}
// Make the number of document a little bit prettier
$('#docs-count').text(function(index, text) {
return parseInt(text).toLocaleString()
});
// Make the database a little bit easier to read
$('#db-size').text(function(index, text) {
return filesize(parseInt(text))
});

File diff suppressed because it is too large Load Diff

View File

@ -1,362 +0,0 @@
#![allow(unused)]
use std::path::Path;
use std::sync::Arc;
use crossbeam_channel::Sender;
use heed::types::{ByteSlice, DecodeIgnore, OwnedType, SerdeJson};
use heed::{Database, Env, EnvOpenOptions};
use milli::heed;
use serde::{Deserialize, Serialize};
pub type BEU64 = heed::zerocopy::U64<heed::byteorder::BE>;
#[derive(Clone)]
pub struct UpdateStore<M, N> {
env: Env,
pending_meta: Database<OwnedType<BEU64>, SerdeJson<M>>,
pending: Database<OwnedType<BEU64>, ByteSlice>,
processed_meta: Database<OwnedType<BEU64>, SerdeJson<N>>,
aborted_meta: Database<OwnedType<BEU64>, SerdeJson<M>>,
notification_sender: Sender<()>,
}
pub trait UpdateHandler<M, N> {
fn handle_update(&mut self, update_id: u64, meta: M, content: &[u8]) -> heed::Result<N>;
}
impl<M, N, F> UpdateHandler<M, N> for F
where
F: FnMut(u64, M, &[u8]) -> heed::Result<N> + Send + 'static,
{
fn handle_update(&mut self, update_id: u64, meta: M, content: &[u8]) -> heed::Result<N> {
self(update_id, meta, content)
}
}
impl<M: 'static, N: 'static> UpdateStore<M, N> {
pub fn open<P, U>(
mut options: EnvOpenOptions,
path: P,
mut update_handler: U,
) -> heed::Result<Arc<UpdateStore<M, N>>>
where
P: AsRef<Path>,
U: UpdateHandler<M, N> + Send + 'static,
M: for<'a> Deserialize<'a>,
N: Serialize,
{
options.max_dbs(4);
let env = options.open(path)?;
let pending_meta = env.create_database(Some("pending-meta"))?;
let pending = env.create_database(Some("pending"))?;
let processed_meta = env.create_database(Some("processed-meta"))?;
let aborted_meta = env.create_database(Some("aborted-meta"))?;
let (notification_sender, notification_receiver) = crossbeam_channel::bounded(1);
// Send a first notification to trigger the process.
let _ = notification_sender.send(());
let update_store = Arc::new(UpdateStore {
env,
pending,
pending_meta,
processed_meta,
aborted_meta,
notification_sender,
});
let update_store_cloned = update_store.clone();
std::thread::spawn(move || {
// Block and wait for something to process.
for () in notification_receiver {
loop {
match update_store_cloned.process_pending_update(&mut update_handler) {
Ok(Some(_)) => (),
Ok(None) => break,
Err(e) => eprintln!("error while processing update: {}", e),
}
}
}
});
Ok(update_store)
}
/// Returns the new biggest id to use to store the new update.
fn new_update_id(&self, txn: &heed::RoTxn) -> heed::Result<u64> {
let last_pending =
self.pending_meta.remap_data_type::<DecodeIgnore>().last(txn)?.map(|(k, _)| k.get());
let last_processed =
self.processed_meta.remap_data_type::<DecodeIgnore>().last(txn)?.map(|(k, _)| k.get());
let last_aborted =
self.aborted_meta.remap_data_type::<DecodeIgnore>().last(txn)?.map(|(k, _)| k.get());
let last_update_id =
[last_pending, last_processed, last_aborted].iter().copied().flatten().max();
match last_update_id {
Some(last_id) => Ok(last_id + 1),
None => Ok(0),
}
}
/// Registers the update content in the pending store and the meta
/// into the pending-meta store. Returns the new unique update id.
pub fn register_update(&self, meta: &M, content: &[u8]) -> heed::Result<u64>
where
M: Serialize,
{
let mut wtxn = self.env.write_txn()?;
// We ask the update store to give us a new update id, this is safe,
// no other update can have the same id because we use a write txn before
// asking for the id and registering it so other update registering
// will be forced to wait for a new write txn.
let update_id = self.new_update_id(&wtxn)?;
let update_key = BEU64::new(update_id);
self.pending_meta.put(&mut wtxn, &update_key, meta)?;
self.pending.put(&mut wtxn, &update_key, content)?;
wtxn.commit()?;
if let Err(e) = self.notification_sender.try_send(()) {
assert!(!e.is_disconnected(), "update notification channel is disconnected");
}
Ok(update_id)
}
/// Executes the user provided function on the next pending update (the one with the lowest id).
/// This is asynchronous as it let the user process the update with a read-only txn and
/// only writing the result meta to the processed-meta store *after* it has been processed.
fn process_pending_update<U>(&self, handler: &mut U) -> heed::Result<Option<(u64, N)>>
where
U: UpdateHandler<M, N>,
M: for<'a> Deserialize<'a>,
N: Serialize,
{
// Create a read transaction to be able to retrieve the pending update in order.
let rtxn = self.env.read_txn()?;
let first_meta = self.pending_meta.first(&rtxn)?;
// If there is a pending update we process and only keep
// a reader while processing it, not a writer.
match first_meta {
Some((first_id, first_meta)) => {
let first_content =
self.pending.get(&rtxn, &first_id)?.expect("associated update content");
// Process the pending update using the provided user function.
let new_meta = handler.handle_update(first_id.get(), first_meta, first_content)?;
drop(rtxn);
// Once the pending update have been successfully processed
// we must remove the content from the pending stores and
// write the *new* meta to the processed-meta store and commit.
let mut wtxn = self.env.write_txn()?;
self.pending_meta.delete(&mut wtxn, &first_id)?;
self.pending.delete(&mut wtxn, &first_id)?;
self.processed_meta.put(&mut wtxn, &first_id, &new_meta)?;
wtxn.commit()?;
Ok(Some((first_id.get(), new_meta)))
}
None => Ok(None),
}
}
/// The id and metadata of the update that is currently being processed,
/// `None` if no update is being processed.
pub fn processing_update(&self) -> heed::Result<Option<(u64, M)>>
where
M: for<'a> Deserialize<'a>,
{
let rtxn = self.env.read_txn()?;
match self.pending_meta.first(&rtxn)? {
Some((key, meta)) => Ok(Some((key.get(), meta))),
None => Ok(None),
}
}
/// Execute the user defined function with the meta-store iterators, the first
/// iterator is the *processed* meta one, the second the *aborted* meta one
/// and, the last is the *pending* meta one.
pub fn iter_metas<F, T>(&self, mut f: F) -> heed::Result<T>
where
M: for<'a> Deserialize<'a>,
N: for<'a> Deserialize<'a>,
F: for<'a> FnMut(
heed::RoIter<'a, OwnedType<BEU64>, SerdeJson<N>>,
heed::RoIter<'a, OwnedType<BEU64>, SerdeJson<M>>,
heed::RoIter<'a, OwnedType<BEU64>, SerdeJson<M>>,
) -> heed::Result<T>,
{
let rtxn = self.env.read_txn()?;
// We get the pending, processed and aborted meta iterators.
let processed_iter = self.processed_meta.iter(&rtxn)?;
let aborted_iter = self.aborted_meta.iter(&rtxn)?;
let pending_iter = self.pending_meta.iter(&rtxn)?;
// We execute the user defined function with both iterators.
(f)(processed_iter, aborted_iter, pending_iter)
}
/// Returns the update associated meta or `None` if the update doesn't exist.
pub fn meta(&self, update_id: u64) -> heed::Result<Option<UpdateStatusMeta<M, N>>>
where
M: for<'a> Deserialize<'a>,
N: for<'a> Deserialize<'a>,
{
let rtxn = self.env.read_txn()?;
let key = BEU64::new(update_id);
if let Some(meta) = self.pending_meta.get(&rtxn, &key)? {
return Ok(Some(UpdateStatusMeta::Pending(meta)));
}
if let Some(meta) = self.processed_meta.get(&rtxn, &key)? {
return Ok(Some(UpdateStatusMeta::Processed(meta)));
}
if let Some(meta) = self.aborted_meta.get(&rtxn, &key)? {
return Ok(Some(UpdateStatusMeta::Aborted(meta)));
}
Ok(None)
}
/// Aborts an update, an aborted update content is deleted and
/// the meta of it is moved into the aborted updates database.
///
/// Trying to abort an update that is currently being processed, an update
/// that as already been processed or which doesn't actually exist, will
/// return `None`.
pub fn abort_update(&self, update_id: u64) -> heed::Result<Option<M>>
where
M: Serialize + for<'a> Deserialize<'a>,
{
let mut wtxn = self.env.write_txn()?;
let key = BEU64::new(update_id);
// We cannot abort an update that is currently being processed.
if self.pending_meta.first(&wtxn)?.map(|(key, _)| key.get()) == Some(update_id) {
return Ok(None);
}
let meta = match self.pending_meta.get(&wtxn, &key)? {
Some(meta) => meta,
None => return Ok(None),
};
self.aborted_meta.put(&mut wtxn, &key, &meta)?;
self.pending_meta.delete(&mut wtxn, &key)?;
self.pending.delete(&mut wtxn, &key)?;
wtxn.commit()?;
Ok(Some(meta))
}
/// Aborts all the pending updates, and not the one being currently processed.
/// Returns the update metas and ids that were successfully aborted.
pub fn abort_pendings(&self) -> heed::Result<Vec<(u64, M)>>
where
M: Serialize + for<'a> Deserialize<'a>,
{
let mut wtxn = self.env.write_txn()?;
let mut aborted_updates = Vec::new();
// We skip the first pending update as it is currently being processed.
for result in self.pending_meta.iter(&wtxn)?.skip(1) {
let (key, meta) = result?;
let id = key.get();
aborted_updates.push((id, meta));
}
for (id, meta) in &aborted_updates {
let key = BEU64::new(*id);
self.aborted_meta.put(&mut wtxn, &key, &meta)?;
self.pending_meta.delete(&mut wtxn, &key)?;
self.pending.delete(&mut wtxn, &key)?;
}
wtxn.commit()?;
Ok(aborted_updates)
}
}
#[derive(Debug, PartialEq, Eq, Hash)]
pub enum UpdateStatusMeta<M, N> {
Pending(M),
Processed(N),
Aborted(M),
}
#[cfg(test)]
mod tests {
use std::thread;
use std::time::{Duration, Instant};
use super::*;
#[test]
fn simple() {
let dir = tempfile::tempdir().unwrap();
let options = EnvOpenOptions::new();
let update_store = UpdateStore::open(options, dir, |_id, meta: String, _content: &_| {
Ok(meta + " processed")
})
.unwrap();
let meta = String::from("kiki");
let update_id = update_store.register_update(&meta, &[]).unwrap();
thread::sleep(Duration::from_millis(100));
let meta = update_store.meta(update_id).unwrap().unwrap();
assert_eq!(meta, UpdateStatusMeta::Processed(format!("kiki processed")));
}
#[test]
#[ignore]
fn long_running_update() {
let dir = tempfile::tempdir().unwrap();
let options = EnvOpenOptions::new();
let update_store = UpdateStore::open(options, dir, |_id, meta: String, _content: &_| {
thread::sleep(Duration::from_millis(400));
Ok(meta + " processed")
})
.unwrap();
let before_register = Instant::now();
let meta = String::from("kiki");
let update_id_kiki = update_store.register_update(&meta, &[]).unwrap();
assert!(before_register.elapsed() < Duration::from_millis(200));
let meta = String::from("coco");
let update_id_coco = update_store.register_update(&meta, &[]).unwrap();
assert!(before_register.elapsed() < Duration::from_millis(200));
let meta = String::from("cucu");
let update_id_cucu = update_store.register_update(&meta, &[]).unwrap();
assert!(before_register.elapsed() < Duration::from_millis(200));
thread::sleep(Duration::from_millis(400 * 3 + 100));
let meta = update_store.meta(update_id_kiki).unwrap().unwrap();
assert_eq!(meta, UpdateStatusMeta::Processed(format!("kiki processed")));
let meta = update_store.meta(update_id_coco).unwrap().unwrap();
assert_eq!(meta, UpdateStatusMeta::Processed(format!("coco processed")));
let meta = update_store.meta(update_id_cucu).unwrap().unwrap();
assert_eq!(meta, UpdateStatusMeta::Processed(format!("cucu processed")));
}
}

View File

@ -1,102 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="stylesheet" href="/bulma.min.css">
<link rel="stylesheet" href="/bulma-prefers-dark.min.css">
<link rel="stylesheet" href="/style.css">
<script type="text/javascript" src="/jquery-3.4.1.min.js"></script>
<script type="text/javascript" src="/filesize.min.js"></script>
<title>{{ db_name }} | The milli engine</title>
</head>
<body>
<section class="hero is-light">
<div class="hero-body">
<div class="container">
<div class="columns is-flex is-centered mb-6">
<figure class="image is-128x128">
<img id="logo-white" src="logo-white.svg" alt="milli logo in white">
<img id="logo-black" src="logo-black.svg" alt="milli logo in black">
</figure>
</div>
<nav class="level">
<div class="level-item has-text-centered">
<div>
<p class="heading">Database Name</p>
<p class="title">{{ db_name }}</p>
</div>
</div>
<div class="level-item has-text-centered">
<div>
<p class="heading">Database Size</p>
<p class="title" id="db-size">{{ db_size }}</p>
</div>
</div>
<div class="level-item has-text-centered">
<div>
<p class="heading">Number of Documents</p>
<p class="title" id="docs-count">{{ docs_count }}</p>
</div>
<button id="display_facets" class="button is-success">display facets</button>
</div>
</nav>
</div>
</div>
</section>
<section class="hero container">
<div class="notification" style="border-radius: 0 0 4px 4px;">
<nav class="level">
<!-- Left side -->
<div class="level-left">
<div class="level-item">
<div class="field has-addons has-addons-right">
<input id="query" class="input" type="text" autofocus placeholder="e.g. George Clooney">
<input id="filters" class="input" type="text" placeholder="filters like released >= 1577836800">
</div>
</div>
<div class="level-item"></div>
</div>
<!-- Right side -->
<nav class="level-right">
<div class="level-item has-text-centered">
<div>
<p class="heading">Candidates</p>
<p id="count" class="title">0</p>
</div>
</div>
<div class="level-item has-text-centered">
</div>
<div class="level-item has-text-centered">
<div>
<p class="heading">Time Spent</p>
<p id="time" class="title">0ms</p>
</div>
</div>
</nav>
</nav>
</div>
</section>
<section id="facets">
<!-- facet values -->
</section>
<section>
<ol id="results" class="content">
<!-- documents matching requests -->
</ol>
</section>
</body>
<script type="text/javascript" src="/script.js"></script>
</html>

View File

@ -1,95 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="stylesheet" href="/bulma.min.css">
<link rel="stylesheet" href="/bulma-prefers-dark.min.css">
<link rel="stylesheet" href="/style.css">
<script type="text/javascript" src="/jquery-3.4.1.min.js"></script>
<script type="text/javascript" src="/filesize.min.js"></script>
<title>{{ db_name }} | Updates</title>
</head>
<body>
<section class="hero is-light">
<div class="hero-body">
<div class="container">
<a href="/">
<div class="columns is-flex is-centered mb-6">
<figure class="image is-128x128">
<img id="logo-white" src="logo-white.svg" alt="milli logo in white">
<img id="logo-black" src="logo-black.svg" alt="milli logo in black">
</figure>
</div>
</a>
<nav class="level">
<div class="level-item has-text-centered">
<div>
<p class="heading">Database Name</p>
<p class="title">{{ db_name }}</p>
</div>
</div>
<div class="level-item has-text-centered">
<div>
<p class="heading">Database Size</p>
<p class="title" id="db-size">{{ db_size }}</p>
</div>
</div>
<div class="level-item has-text-centered">
<div>
<p class="heading">Number of Documents</p>
<p class="title" id="docs-count">{{ docs_count }}</p>
</div>
</div>
</nav>
</div>
</div>
</section>
<section>
<ol id="results" class="content">
{% for update in updates %}
{% match update %}
{% when UpdateStatus::Pending with { update_id, meta } %}
<li id="update-{{ update_id }}" class="document">
<ol>
<li class="field">
<div class="attribute">update id</div><div class="updateId content">{{ update_id }}</div>
<div class="attribute">update status</div><div class="updateStatus content">pending</div>
</li>
</ol>
</li>
{% when UpdateStatus::Processed with { update_id, meta } %}
<li id="update-{{ update_id }}" class="document">
<ol>
<li class="field">
<div class="attribute">update id</div><div class="updateId content">{{ update_id }}</div>
<div class="attribute">update status</div><div class="updateStatus content">{{ meta }}</div>
</li>
</ol>
</li>
{% when UpdateStatus::Aborted with { update_id, meta } %}
<li id="update-{{ update_id }}" class="document">
<ol>
<li class="field">
<div class="attribute">update id</div><div class="updateId content">{{ update_id }}</div>
<div class="attribute">update status</div><div class="updateStatus content">aborted</div>
</li>
</ol>
</li>
{% else %}
{% endmatch %}
{% endfor %}
</ol>
</section>
</body>
<script type="text/javascript" src="/updates-script.js"></script>
</html>

View File

@ -1,17 +0,0 @@
[package]
name = "infos"
version = "0.33.4"
authors = ["Clément Renault <clement@meilisearch.com>"]
edition = "2018"
publish = false
[dependencies]
anyhow = "1.0.56"
byte-unit = { version = "4.0.14", default-features = false, features = ["std"] }
csv = "1.1.6"
milli = { path = "../milli" }
mimalloc = { version = "0.1.29", default-features = false }
roaring = "0.9.0"
serde_json = "1.0.79"
stderrlog = "0.5.1"
structopt = { version = "0.3.26", default-features = false }

File diff suppressed because it is too large Load Diff

View File

@ -1,26 +0,0 @@
# Milli
## Fuzzing milli
Currently you can only fuzz the indexation.
To execute the fuzzer run:
```
cargo +nightly fuzz run indexing
```
To execute the fuzzer on multiple thread you can also run:
```
cargo +nightly fuzz run -j4 indexing
```
Since the fuzzer is going to create a lot of temporary file to let milli index its documents
I would also recommand to execute it on a ramdisk.
Here is how to setup a ramdisk on linux:
```
sudo mount -t tmpfs none path/to/your/ramdisk
```
And then set the [TMPDIR](https://doc.rust-lang.org/std/env/fn.temp_dir.html) environment variable
to make the fuzzer create its file in it:
```
export TMPDIR=path/to/your/ramdisk
```

View File

@ -1,5 +0,0 @@
Cargo.lock
target/
/corpus/
/artifacts/

View File

@ -1,34 +0,0 @@
[package]
name = "milli-fuzz"
version = "0.0.0"
authors = ["Automatically generated"]
publish = false
edition = "2018"
[package.metadata]
cargo-fuzz = true
[dependencies]
arbitrary = "1.0"
libfuzzer-sys = "0.4"
serde_json = { version = "1.0.62", features = ["preserve_order"] }
anyhow = "1.0"
tempfile = "3.3"
arbitrary-json = "0.1.0"
mimalloc = { version = "0.1.29", default-features = false }
[dependencies.milli]
path = ".."
# Prevent this from interfering with workspaces
[workspace]
members = ["."]
[profile.release]
debug = true
[[bin]]
name = "indexing"
path = "fuzz_targets/indexing.rs"
test = false
doc = false

View File

@ -1,114 +0,0 @@
#![no_main]
use std::collections::HashSet;
use std::io::{BufWriter, Cursor, Read, Seek, Write};
use anyhow::{bail, Result};
use arbitrary_json::ArbitraryValue;
use libfuzzer_sys::fuzz_target;
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use milli::heed::EnvOpenOptions;
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
use milli::{Index, Object};
use serde_json::{Map, Value};
#[global_allocator]
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
/// reads json from input and write an obkv batch to writer.
pub fn read_json(input: impl Read, writer: impl Write + Seek) -> Result<usize> {
let writer = BufWriter::new(writer);
let mut builder = DocumentsBatchBuilder::new(writer);
let values: Vec<Object> = serde_json::from_reader(input)?;
if builder.documents_count() == 0 {
bail!("Empty payload");
}
for object in values {
builder.append_json_object(&object)?;
}
let count = builder.documents_count();
let vector = builder.into_inner()?;
Ok(count as usize)
}
fn index_documents(
index: &mut milli::Index,
documents: DocumentsBatchReader<Cursor<Vec<u8>>>,
) -> Result<()> {
let config = IndexerConfig::default();
let mut wtxn = index.write_txn()?;
let indexing_config = IndexDocumentsConfig::default();
let mut builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())?;
builder.add_documents(documents)?;
builder.execute().unwrap();
wtxn.commit()?;
Ok(())
}
fn create_index() -> Result<milli::Index> {
let dir = tempfile::tempdir().unwrap();
let mut options = EnvOpenOptions::new();
options.map_size(10 * 1024 * 1024 * 1024); // 10 GB
options.max_readers(1);
let index = Index::new(options, dir.path())?;
let config = IndexerConfig::default();
let mut wtxn = index.write_txn().unwrap();
let mut builder = Settings::new(&mut wtxn, &index, &config);
let displayed_fields =
["id", "title", "album", "artist", "genre", "country", "released", "duration"]
.iter()
.map(|s| s.to_string())
.collect();
builder.set_displayed_fields(displayed_fields);
let searchable_fields = ["title", "album", "artist"].iter().map(|s| s.to_string()).collect();
builder.set_searchable_fields(searchable_fields);
let faceted_fields: HashSet<String> =
["released-timestamp", "duration-float", "genre", "country", "artist"]
.iter()
.map(|s| s.to_string())
.collect();
builder.set_filterable_fields(faceted_fields.clone());
builder.set_sortable_fields(faceted_fields);
builder.set_distinct_field("same".to_string());
builder.execute(|_| ()).unwrap();
wtxn.commit().unwrap();
Ok(index)
}
fuzz_target!(|batches: Vec<Vec<ArbitraryValue>>| {
if let Ok(mut index) = create_index() {
for batch in batches {
let documents: Vec<Value> =
batch.into_iter().map(|value| serde_json::Value::from(value)).collect();
let json = Value::Array(documents);
let json = serde_json::to_string(&json).unwrap();
let mut documents = Cursor::new(Vec::new());
// We ignore all malformed documents
if let Ok(_) = read_json(json.as_bytes(), &mut documents) {
documents.rewind().unwrap();
let documents = DocumentsBatchReader::from_reader(documents).unwrap();
// A lot of errors can come out of milli and we don't know which ones are normal or not
// so we are only going to look for the unexpected panics.
let _ = index_documents(&mut index, documents);
}
}
index.prepare_for_closing().wait();
}
});