mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-22 12:40:04 +01:00
Remove unused infos/ http-ui/ and fuzz/ crates
This commit is contained in:
parent
4fc6331cb6
commit
add96f921b
@ -45,19 +45,6 @@ We recommend using the `--release` flag to test the full performance.
|
||||
cargo test
|
||||
```
|
||||
|
||||
### Querying the engine via the web interface
|
||||
|
||||
To help you develop your feature you might need to use a web interface! You can query the engine by going to [the HTML page itself](http://127.0.0.1:9700).
|
||||
|
||||
### Compile and run the HTTP debug server
|
||||
|
||||
You can specify the number of threads to use to index documents and many other settings too.
|
||||
|
||||
```bash
|
||||
cd http-ui
|
||||
cargo run --release -- --db my-database.mdb -vvv --indexing-jobs 8
|
||||
```
|
||||
|
||||
### Index your documents
|
||||
|
||||
It can index a massive amount of documents in not much time, I already achieved to index:
|
||||
|
@ -1,6 +1,6 @@
|
||||
[workspace]
|
||||
resolver = "2"
|
||||
members = ["milli", "filter-parser", "flatten-serde-json", "json-depth-checker", "http-ui", "benchmarks", "infos", "helpers", "cli"]
|
||||
members = ["milli", "filter-parser", "flatten-serde-json", "json-depth-checker", "benchmarks", "helpers", "cli"]
|
||||
default-members = ["milli"]
|
||||
|
||||
[profile.dev]
|
||||
|
@ -1,5 +1,5 @@
|
||||
<p align="center">
|
||||
<img alt="the milli logo" src="http-ui/public/logo-black.svg">
|
||||
<img alt="the milli logo" src="logo-black.svg">
|
||||
</p>
|
||||
|
||||
<p align="center">a concurrent indexer combined with fast and relevant search algorithms</p>
|
||||
@ -19,8 +19,6 @@ This repository contains crates to quickly debug the engine:
|
||||
- The `filter-parser` crate contains the parser for the Meilisearch filter syntax.
|
||||
- The `flatten-serde-json` crate contains the library that flattens serde-json `Value` objects like Elasticsearch does.
|
||||
- The `helpers` crate is only used to do operations on the database.
|
||||
- The `http-ui` crate is a simple HTTP dashboard to test the features like for real!
|
||||
- The `infos` crate is used to dump the internal data-structure and ensure correctness.
|
||||
- The `json-depth-checker` crate is used to indicate if a JSON must be flattened.
|
||||
|
||||
## How to use it?
|
||||
|
@ -1,47 +0,0 @@
|
||||
[package]
|
||||
name = "http-ui"
|
||||
description = "The HTTP user interface of the milli search engine"
|
||||
version = "0.33.4"
|
||||
authors = ["Clément Renault <clement@meilisearch.com>"]
|
||||
edition = "2018"
|
||||
publish = false
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.56"
|
||||
byte-unit = { version = "4.0.14", default-features = false, features = ["std"] }
|
||||
crossbeam-channel = "0.5.2"
|
||||
memmap2 = "0.5.3"
|
||||
milli = { path = "../milli" }
|
||||
mimalloc = { version = "0.1.29", default-features = false }
|
||||
once_cell = "1.10.0"
|
||||
rayon = "1.5.1"
|
||||
structopt = { version = "0.3.26", default-features = false, features = ["wrap_help"] }
|
||||
tempfile = "3.3.0"
|
||||
|
||||
# http server
|
||||
askama = "0.11.1"
|
||||
askama_warp = "0.12.0"
|
||||
bytes = "1.1.0"
|
||||
either = "1.6.1"
|
||||
flate2 = "1.0.22"
|
||||
futures = "0.3.21"
|
||||
serde = { version = "1.0.136", features = ["derive"] }
|
||||
serde_json = { version = "1.0.79", features = ["preserve_order"] }
|
||||
tokio = { version = "1.17.0", features = ["full"] }
|
||||
tokio-stream = { version = "0.1.8", default-features = false, features = ["sync"] }
|
||||
warp = "0.3.2"
|
||||
|
||||
# logging
|
||||
fst = "0.4.7"
|
||||
log = "0.4.17"
|
||||
stderrlog = "0.5.1"
|
||||
|
||||
# Temporary fix for bitvec, remove once fixed. (https://github.com/bitvecto-rs/bitvec/issues/105)
|
||||
bimap = "0.6.2"
|
||||
csv = "1.1.6"
|
||||
funty = "2.0.0"
|
||||
|
||||
[dev-dependencies]
|
||||
maplit = "1.0.2"
|
||||
serde_test = "1.0.136"
|
||||
|
1
http-ui/public/bulma-prefers-dark.min.css
vendored
1
http-ui/public/bulma-prefers-dark.min.css
vendored
File diff suppressed because one or more lines are too long
1
http-ui/public/bulma.min.css
vendored
1
http-ui/public/bulma.min.css
vendored
File diff suppressed because one or more lines are too long
5
http-ui/public/filesize.min.js
vendored
5
http-ui/public/filesize.min.js
vendored
@ -1,5 +0,0 @@
|
||||
/*
|
||||
2020 Jason Mulligan <jason.mulligan@avoidwork.com>
|
||||
@version 6.1.0
|
||||
*/
|
||||
"use strict";!function(e){var x=/^(b|B)$/,M={iec:{bits:["b","Kib","Mib","Gib","Tib","Pib","Eib","Zib","Yib"],bytes:["B","KiB","MiB","GiB","TiB","PiB","EiB","ZiB","YiB"]},jedec:{bits:["b","Kb","Mb","Gb","Tb","Pb","Eb","Zb","Yb"],bytes:["B","KB","MB","GB","TB","PB","EB","ZB","YB"]}},w={iec:["","kibi","mebi","gibi","tebi","pebi","exbi","zebi","yobi"],jedec:["","kilo","mega","giga","tera","peta","exa","zetta","yotta"]};function t(e){var i,t,o,n,b,r,a,l,s,d,u,c,f,p,B,y=1<arguments.length&&void 0!==arguments[1]?arguments[1]:{},g=[],v=0,m=void 0,h=void 0;if(isNaN(e))throw new TypeError("Invalid number");return t=!0===y.bits,u=!0===y.unix,i=y.base||2,d=void 0!==y.round?y.round:u?1:2,r=void 0!==y.locale?y.locale:"",a=y.localeOptions||{},c=void 0!==y.separator?y.separator:"",f=void 0!==y.spacer?y.spacer:u?"":" ",B=y.symbols||{},p=2===i&&y.standard||"jedec",s=y.output||"string",n=!0===y.fullform,b=y.fullforms instanceof Array?y.fullforms:[],m=void 0!==y.exponent?y.exponent:-1,o=2<i?1e3:1024,(l=(h=Number(e))<0)&&(h=-h),(-1===m||isNaN(m))&&(m=Math.floor(Math.log(h)/Math.log(o)))<0&&(m=0),8<m&&(m=8),"exponent"===s?m:(0===h?(g[0]=0,g[1]=u?"":M[p][t?"bits":"bytes"][m]):(v=h/(2===i?Math.pow(2,10*m):Math.pow(1e3,m)),t&&o<=(v*=8)&&m<8&&(v/=o,m++),g[0]=Number(v.toFixed(0<m?d:0)),g[0]===o&&m<8&&void 0===y.exponent&&(g[0]=1,m++),g[1]=10===i&&1===m?t?"kb":"kB":M[p][t?"bits":"bytes"][m],u&&(g[1]="jedec"===p?g[1].charAt(0):0<m?g[1].replace(/B$/,""):g[1],x.test(g[1])&&(g[0]=Math.floor(g[0]),g[1]=""))),l&&(g[0]=-g[0]),g[1]=B[g[1]]||g[1],!0===r?g[0]=g[0].toLocaleString():0<r.length?g[0]=g[0].toLocaleString(r,a):0<c.length&&(g[0]=g[0].toString().replace(".",c)),"array"===s?g:(n&&(g[1]=b[m]?b[m]:w[p][m]+(t?"bit":"byte")+(1===g[0]?"":"s")),"object"===s?{value:g[0],symbol:g[1],exponent:m}:g.join(f)))}t.partial=function(i){return function(e){return t(e,i)}},"undefined"!=typeof exports?module.exports=t:"function"==typeof define&&void 0!==define.amd?define(function(){return t}):e.filesize=t}("undefined"!=typeof window?window:global);
|
2
http-ui/public/jquery-3.4.1.min.js
vendored
2
http-ui/public/jquery-3.4.1.min.js
vendored
File diff suppressed because one or more lines are too long
@ -1,6 +0,0 @@
|
||||
<svg width="277" height="236" viewBox="0 0 277 236" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<path fill-rule="evenodd" clip-rule="evenodd" d="M213.085 190L242.907 86H276.196L246.375 190H213.085Z" fill="#494949"/>
|
||||
<path fill-rule="evenodd" clip-rule="evenodd" d="M0 190L29.8215 86H63.1111L33.2896 190H0Z" fill="#494949"/>
|
||||
<path fill-rule="evenodd" clip-rule="evenodd" d="M124.986 0L57.5772 235.083L60.7752 236H90.6038L158.276 0H124.986Z" fill="#494949"/>
|
||||
<path fill-rule="evenodd" clip-rule="evenodd" d="M195.273 0L127.601 236H160.891L228.563 0H195.273Z" fill="#494949"/>
|
||||
</svg>
|
Before Width: | Height: | Size: 585 B |
@ -1,6 +0,0 @@
|
||||
<svg width="277" height="236" viewBox="0 0 277 236" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<path fill-rule="evenodd" clip-rule="evenodd" d="M213.508 190L243.33 86H276.619L246.798 190H213.508Z" fill="#B5B5B5"/>
|
||||
<path fill-rule="evenodd" clip-rule="evenodd" d="M0.422791 190L30.2443 86H63.5339L33.7124 190H0.422791Z" fill="#B5B5B5"/>
|
||||
<path fill-rule="evenodd" clip-rule="evenodd" d="M125.409 0L58 235.083L61.198 236H91.0266L158.699 0H125.409Z" fill="#B5B5B5"/>
|
||||
<path fill-rule="evenodd" clip-rule="evenodd" d="M195.696 0L128.024 236H161.313L228.985 0H195.696Z" fill="#B5B5B5"/>
|
||||
</svg>
|
Before Width: | Height: | Size: 592 B |
@ -1,154 +0,0 @@
|
||||
var request = null;
|
||||
var timeoutID = null;
|
||||
var display_facets = false;
|
||||
|
||||
$('#query, #filters').on('input', function () {
|
||||
var query = $('#query').val();
|
||||
var filters = $('#filters').val();
|
||||
var timeoutMs = 100;
|
||||
|
||||
if (timeoutID !== null) {
|
||||
window.clearTimeout(timeoutID);
|
||||
}
|
||||
|
||||
timeoutID = window.setTimeout(function () {
|
||||
request = $.ajax({
|
||||
type: "POST",
|
||||
url: "query",
|
||||
contentType: 'application/json',
|
||||
data: JSON.stringify({
|
||||
'query': query,
|
||||
'filters': filters,
|
||||
"facetDistribution": display_facets,
|
||||
}),
|
||||
contentType: 'application/json',
|
||||
success: function (data, textStatus, request) {
|
||||
results.innerHTML = '';
|
||||
facets.innerHTML = '';
|
||||
|
||||
let timeSpent = request.getResponseHeader('Time-Ms');
|
||||
let numberOfDocuments = data.documents.length;
|
||||
count.innerHTML = data.numberOfCandidates.toLocaleString();
|
||||
time.innerHTML = `${timeSpent}ms`;
|
||||
time.classList.remove('fade-in-out');
|
||||
|
||||
for (facet_name in data.facets) {
|
||||
for (value in data.facets[facet_name]) {
|
||||
const elem = document.createElement('span');
|
||||
const count = data.facets[facet_name][value];
|
||||
elem.classList.add("tag");
|
||||
elem.setAttribute('data-name', facet_name);
|
||||
elem.setAttribute('data-value', value);
|
||||
elem.innerHTML = `${facet_name}:${value} (${count})`;
|
||||
facets.appendChild(elem);
|
||||
}
|
||||
}
|
||||
|
||||
for (element of data.documents) {
|
||||
const elem = document.createElement('li');
|
||||
elem.classList.add("document");
|
||||
|
||||
const ol = document.createElement('ol');
|
||||
|
||||
for (const prop in element) {
|
||||
const field = document.createElement('li');
|
||||
field.classList.add("field");
|
||||
|
||||
const attribute = document.createElement('div');
|
||||
attribute.classList.add("attribute");
|
||||
attribute.innerHTML = prop;
|
||||
|
||||
const content = document.createElement('div');
|
||||
content.classList.add("content");
|
||||
|
||||
// Stringify Objects and Arrays to avoid [Object object]
|
||||
if (typeof element[prop] === 'object' && element[prop] !== null) {
|
||||
content.innerHTML = JSON.stringify(element[prop]);
|
||||
} else {
|
||||
content.innerHTML = element[prop];
|
||||
}
|
||||
|
||||
field.appendChild(attribute);
|
||||
field.appendChild(content);
|
||||
|
||||
ol.appendChild(field);
|
||||
}
|
||||
|
||||
elem.appendChild(ol);
|
||||
results.appendChild(elem);
|
||||
}
|
||||
|
||||
// When we click on a tag we append the facet value
|
||||
// at the end of the facet query.
|
||||
$('#facets .tag').on('click', function () {
|
||||
let name = $(this).attr("data-name");
|
||||
let value = $(this).attr("data-value");
|
||||
|
||||
let facet_query = $('#filters').val().trim();
|
||||
if (facet_query === "") {
|
||||
$('#filters').val(`${name} = "${value}"`).trigger('input');
|
||||
} else {
|
||||
$('#filters').val(`${facet_query} AND ${name} = "${value}"`).trigger('input');
|
||||
}
|
||||
});
|
||||
},
|
||||
beforeSend: function () {
|
||||
if (request !== null) {
|
||||
request.abort();
|
||||
time.classList.add('fade-in-out');
|
||||
}
|
||||
},
|
||||
});
|
||||
}, timeoutMs);
|
||||
});
|
||||
|
||||
function diffArray(arr1, arr2) {
|
||||
return arr1.concat(arr2).filter(function (val) {
|
||||
if (!(arr1.includes(val) && arr2.includes(val)))
|
||||
return val;
|
||||
});
|
||||
}
|
||||
|
||||
function selectedFacetsToArray(facets_obj) {
|
||||
var array = [];
|
||||
for (const facet_name in facets_obj) {
|
||||
var subarray = [];
|
||||
for (const facet_value of facets_obj[facet_name]) {
|
||||
subarray.push(`${facet_name}:${facet_value}`);
|
||||
}
|
||||
array.push(subarray);
|
||||
}
|
||||
return array;
|
||||
}
|
||||
|
||||
$('#display_facets').click(function() {
|
||||
if (display_facets) {
|
||||
display_facets = false;
|
||||
$('#display_facets').html("Display facets")
|
||||
$('#display_facets').removeClass("is-danger");
|
||||
$('#display_facets').addClass("is-success");
|
||||
$('#facets').hide();
|
||||
} else {
|
||||
display_facets = true;
|
||||
$('#display_facets').html("Hide facets")
|
||||
$('#display_facets').addClass("is-danger");
|
||||
$('#display_facets').removeClass("is-success");
|
||||
$('#facets').show();
|
||||
}
|
||||
});
|
||||
|
||||
// Make the number of document a little bit prettier
|
||||
$('#docs-count').text(function(index, text) {
|
||||
return parseInt(text).toLocaleString()
|
||||
});
|
||||
|
||||
// Make the database a little bit easier to read
|
||||
$('#db-size').text(function(index, text) {
|
||||
return filesize(parseInt(text))
|
||||
});
|
||||
|
||||
// We trigger the input when we load the script.
|
||||
$(window).on('load', function () {
|
||||
// We execute a placeholder search when the input is empty.
|
||||
$('#query').trigger('input');
|
||||
});
|
@ -1,144 +0,0 @@
|
||||
#results {
|
||||
max-width: 900px;
|
||||
margin: 20px auto 0 auto;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
#facets .tag {
|
||||
margin-right: 1em;
|
||||
margin-bottom: 1em;
|
||||
}
|
||||
|
||||
#facets {
|
||||
display: none;
|
||||
max-width: 900px;
|
||||
margin: 20px auto 0 auto;
|
||||
padding: 0;
|
||||
max-height: 16em;
|
||||
overflow: scroll;
|
||||
}
|
||||
|
||||
#display_facets {
|
||||
margin: 20px auto 0 auto;
|
||||
padding: 5px;
|
||||
max-height: 16em;
|
||||
overflow: scroll;
|
||||
}
|
||||
|
||||
#facets .tag:hover {
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
#logo-white {
|
||||
display: none;
|
||||
}
|
||||
|
||||
#logo-black {
|
||||
display: inherit;
|
||||
}
|
||||
|
||||
.notification {
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
}
|
||||
|
||||
.document {
|
||||
padding: 20px 20px;
|
||||
background-color: #f5f5f5;
|
||||
border-radius: 4px;
|
||||
margin-bottom: 20px;
|
||||
display: flex;
|
||||
}
|
||||
|
||||
.document ol {
|
||||
flex: 0 0 75%;
|
||||
max-width: 75%;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
.document .image {
|
||||
max-width: 25%;
|
||||
flex: 0 0 25%;
|
||||
padding-left: 30px;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
.document .image img {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.field {
|
||||
list-style-type: none;
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
|
||||
.field:not(:last-child) {
|
||||
margin-bottom: 7px;
|
||||
}
|
||||
|
||||
.attribute {
|
||||
flex: 0 0 35%;
|
||||
max-width: 35%;
|
||||
text-align: right;
|
||||
padding-right: 10px;
|
||||
box-sizing: border-box;
|
||||
text-transform: uppercase;
|
||||
opacity: 0.7;
|
||||
}
|
||||
|
||||
.content {
|
||||
max-width: 65%;
|
||||
flex: 0 0 65%;
|
||||
box-sizing: border-box;
|
||||
padding-left: 10px;
|
||||
color: rgba(0,0,0,.9);
|
||||
}
|
||||
|
||||
.content mark {
|
||||
background-color: hsl(204, 86%, 88%);
|
||||
color: hsl(204, 86%, 25%);
|
||||
}
|
||||
|
||||
@keyframes fadeInOut {
|
||||
0% { opacity: 1; }
|
||||
30% { opacity: 0.3; }
|
||||
100% { opacity: 1; }
|
||||
}
|
||||
|
||||
.fade-in-out {
|
||||
animation: fadeInOut ease 1s infinite;
|
||||
}
|
||||
|
||||
@media (prefers-color-scheme:dark) {
|
||||
#logo-white {
|
||||
display: inherit;
|
||||
}
|
||||
|
||||
#logo-black {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.hero.is-light {
|
||||
background-color: #242424;
|
||||
color: inherit;
|
||||
}
|
||||
|
||||
.hero.is-light .title {
|
||||
color: inherit;
|
||||
}
|
||||
|
||||
.document {
|
||||
background-color: #242424;
|
||||
}
|
||||
|
||||
.content {
|
||||
color: #dbdbdb;
|
||||
}
|
||||
|
||||
.content mark {
|
||||
background-color: hsl(0, 0%, 35%);
|
||||
color: hsl(0,0%,90.2%);
|
||||
}
|
||||
}
|
@ -1,102 +0,0 @@
|
||||
$(window).on('load', function () {
|
||||
let wsProtcol = "ws";
|
||||
if (window.location.protocol === 'https') {
|
||||
wsProtcol = 'wss';
|
||||
}
|
||||
|
||||
let url = wsProtcol + '://' + window.location.hostname + ':' + window.location.port + '/updates/ws';
|
||||
var socket = new WebSocket(url);
|
||||
|
||||
socket.onmessage = function (event) {
|
||||
let status = JSON.parse(event.data);
|
||||
|
||||
if (status.type == 'Pending') {
|
||||
const elem = document.createElement('li');
|
||||
elem.classList.add("document");
|
||||
elem.setAttribute("id", 'update-' + status.update_id);
|
||||
|
||||
const ol = document.createElement('ol');
|
||||
const field = document.createElement('li');
|
||||
field.classList.add("field");
|
||||
|
||||
const attributeUpdateId = document.createElement('div');
|
||||
attributeUpdateId.classList.add("attribute");
|
||||
attributeUpdateId.innerHTML = "update id";
|
||||
|
||||
const contentUpdateId = document.createElement('div');
|
||||
contentUpdateId.classList.add("updateId");
|
||||
contentUpdateId.classList.add("content");
|
||||
contentUpdateId.innerHTML = status.update_id;
|
||||
|
||||
field.appendChild(attributeUpdateId);
|
||||
field.appendChild(contentUpdateId);
|
||||
|
||||
const attributeUpdateStatus = document.createElement('div');
|
||||
attributeUpdateStatus.classList.add("attribute");
|
||||
attributeUpdateStatus.innerHTML = "update status";
|
||||
|
||||
const contentUpdateStatus = document.createElement('div');
|
||||
contentUpdateStatus.classList.add("updateStatus");
|
||||
contentUpdateStatus.classList.add("content");
|
||||
contentUpdateStatus.innerHTML = 'pending';
|
||||
|
||||
field.appendChild(attributeUpdateStatus);
|
||||
field.appendChild(contentUpdateStatus);
|
||||
|
||||
ol.appendChild(field);
|
||||
elem.appendChild(ol);
|
||||
|
||||
prependChild(results, elem);
|
||||
}
|
||||
|
||||
if (status.type == "Progressing") {
|
||||
const id = 'update-' + status.update_id;
|
||||
const content = $(`#${id} .updateStatus.content`);
|
||||
|
||||
let html;
|
||||
|
||||
let { type, step, total_steps, current, total } = status.meta;
|
||||
|
||||
if (type === 'DocumentsAddition') {
|
||||
// If the total is null or undefined then the progress results is infinity.
|
||||
let progress = Math.round(current / total * 100);
|
||||
// We must divide the progress by the total number of indexing steps.
|
||||
progress = progress / total_steps;
|
||||
// And mark the previous steps as processed.
|
||||
progress = progress + (step * 100 / total_steps);
|
||||
// Generate the appropriate html bulma progress bar.
|
||||
html = `<progress class="progress" title="${progress}%" value="${progress}" max="100"></progress>`;
|
||||
} else {
|
||||
html = `<progress class="progress" max="100"></progress>`;
|
||||
}
|
||||
|
||||
content.html(html);
|
||||
}
|
||||
|
||||
if (status.type == "Processed") {
|
||||
const id = 'update-' + status.update_id;
|
||||
const content = $(`#${id} .updateStatus.content`);
|
||||
content.html('processed ' + JSON.stringify(status.meta));
|
||||
}
|
||||
|
||||
if (status.type == "Aborted") {
|
||||
const id = 'update-' + status.update_id;
|
||||
const content = $(`#${id} .updateStatus.content`);
|
||||
content.html('aborted ' + JSON.stringify(status.meta));
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
function prependChild(parent, newFirstChild) {
|
||||
parent.insertBefore(newFirstChild, parent.firstChild)
|
||||
}
|
||||
|
||||
// Make the number of document a little bit prettier
|
||||
$('#docs-count').text(function(index, text) {
|
||||
return parseInt(text).toLocaleString()
|
||||
});
|
||||
|
||||
// Make the database a little bit easier to read
|
||||
$('#db-size').text(function(index, text) {
|
||||
return filesize(parseInt(text))
|
||||
});
|
1176
http-ui/src/main.rs
1176
http-ui/src/main.rs
File diff suppressed because it is too large
Load Diff
@ -1,362 +0,0 @@
|
||||
#![allow(unused)]
|
||||
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use crossbeam_channel::Sender;
|
||||
use heed::types::{ByteSlice, DecodeIgnore, OwnedType, SerdeJson};
|
||||
use heed::{Database, Env, EnvOpenOptions};
|
||||
use milli::heed;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
pub type BEU64 = heed::zerocopy::U64<heed::byteorder::BE>;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct UpdateStore<M, N> {
|
||||
env: Env,
|
||||
pending_meta: Database<OwnedType<BEU64>, SerdeJson<M>>,
|
||||
pending: Database<OwnedType<BEU64>, ByteSlice>,
|
||||
processed_meta: Database<OwnedType<BEU64>, SerdeJson<N>>,
|
||||
aborted_meta: Database<OwnedType<BEU64>, SerdeJson<M>>,
|
||||
notification_sender: Sender<()>,
|
||||
}
|
||||
|
||||
pub trait UpdateHandler<M, N> {
|
||||
fn handle_update(&mut self, update_id: u64, meta: M, content: &[u8]) -> heed::Result<N>;
|
||||
}
|
||||
|
||||
impl<M, N, F> UpdateHandler<M, N> for F
|
||||
where
|
||||
F: FnMut(u64, M, &[u8]) -> heed::Result<N> + Send + 'static,
|
||||
{
|
||||
fn handle_update(&mut self, update_id: u64, meta: M, content: &[u8]) -> heed::Result<N> {
|
||||
self(update_id, meta, content)
|
||||
}
|
||||
}
|
||||
|
||||
impl<M: 'static, N: 'static> UpdateStore<M, N> {
|
||||
pub fn open<P, U>(
|
||||
mut options: EnvOpenOptions,
|
||||
path: P,
|
||||
mut update_handler: U,
|
||||
) -> heed::Result<Arc<UpdateStore<M, N>>>
|
||||
where
|
||||
P: AsRef<Path>,
|
||||
U: UpdateHandler<M, N> + Send + 'static,
|
||||
M: for<'a> Deserialize<'a>,
|
||||
N: Serialize,
|
||||
{
|
||||
options.max_dbs(4);
|
||||
let env = options.open(path)?;
|
||||
let pending_meta = env.create_database(Some("pending-meta"))?;
|
||||
let pending = env.create_database(Some("pending"))?;
|
||||
let processed_meta = env.create_database(Some("processed-meta"))?;
|
||||
let aborted_meta = env.create_database(Some("aborted-meta"))?;
|
||||
|
||||
let (notification_sender, notification_receiver) = crossbeam_channel::bounded(1);
|
||||
// Send a first notification to trigger the process.
|
||||
let _ = notification_sender.send(());
|
||||
|
||||
let update_store = Arc::new(UpdateStore {
|
||||
env,
|
||||
pending,
|
||||
pending_meta,
|
||||
processed_meta,
|
||||
aborted_meta,
|
||||
notification_sender,
|
||||
});
|
||||
|
||||
let update_store_cloned = update_store.clone();
|
||||
std::thread::spawn(move || {
|
||||
// Block and wait for something to process.
|
||||
for () in notification_receiver {
|
||||
loop {
|
||||
match update_store_cloned.process_pending_update(&mut update_handler) {
|
||||
Ok(Some(_)) => (),
|
||||
Ok(None) => break,
|
||||
Err(e) => eprintln!("error while processing update: {}", e),
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
Ok(update_store)
|
||||
}
|
||||
|
||||
/// Returns the new biggest id to use to store the new update.
|
||||
fn new_update_id(&self, txn: &heed::RoTxn) -> heed::Result<u64> {
|
||||
let last_pending =
|
||||
self.pending_meta.remap_data_type::<DecodeIgnore>().last(txn)?.map(|(k, _)| k.get());
|
||||
|
||||
let last_processed =
|
||||
self.processed_meta.remap_data_type::<DecodeIgnore>().last(txn)?.map(|(k, _)| k.get());
|
||||
|
||||
let last_aborted =
|
||||
self.aborted_meta.remap_data_type::<DecodeIgnore>().last(txn)?.map(|(k, _)| k.get());
|
||||
|
||||
let last_update_id =
|
||||
[last_pending, last_processed, last_aborted].iter().copied().flatten().max();
|
||||
|
||||
match last_update_id {
|
||||
Some(last_id) => Ok(last_id + 1),
|
||||
None => Ok(0),
|
||||
}
|
||||
}
|
||||
|
||||
/// Registers the update content in the pending store and the meta
|
||||
/// into the pending-meta store. Returns the new unique update id.
|
||||
pub fn register_update(&self, meta: &M, content: &[u8]) -> heed::Result<u64>
|
||||
where
|
||||
M: Serialize,
|
||||
{
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
|
||||
// We ask the update store to give us a new update id, this is safe,
|
||||
// no other update can have the same id because we use a write txn before
|
||||
// asking for the id and registering it so other update registering
|
||||
// will be forced to wait for a new write txn.
|
||||
let update_id = self.new_update_id(&wtxn)?;
|
||||
let update_key = BEU64::new(update_id);
|
||||
|
||||
self.pending_meta.put(&mut wtxn, &update_key, meta)?;
|
||||
self.pending.put(&mut wtxn, &update_key, content)?;
|
||||
|
||||
wtxn.commit()?;
|
||||
|
||||
if let Err(e) = self.notification_sender.try_send(()) {
|
||||
assert!(!e.is_disconnected(), "update notification channel is disconnected");
|
||||
}
|
||||
|
||||
Ok(update_id)
|
||||
}
|
||||
|
||||
/// Executes the user provided function on the next pending update (the one with the lowest id).
|
||||
/// This is asynchronous as it let the user process the update with a read-only txn and
|
||||
/// only writing the result meta to the processed-meta store *after* it has been processed.
|
||||
fn process_pending_update<U>(&self, handler: &mut U) -> heed::Result<Option<(u64, N)>>
|
||||
where
|
||||
U: UpdateHandler<M, N>,
|
||||
M: for<'a> Deserialize<'a>,
|
||||
N: Serialize,
|
||||
{
|
||||
// Create a read transaction to be able to retrieve the pending update in order.
|
||||
let rtxn = self.env.read_txn()?;
|
||||
let first_meta = self.pending_meta.first(&rtxn)?;
|
||||
|
||||
// If there is a pending update we process and only keep
|
||||
// a reader while processing it, not a writer.
|
||||
match first_meta {
|
||||
Some((first_id, first_meta)) => {
|
||||
let first_content =
|
||||
self.pending.get(&rtxn, &first_id)?.expect("associated update content");
|
||||
|
||||
// Process the pending update using the provided user function.
|
||||
let new_meta = handler.handle_update(first_id.get(), first_meta, first_content)?;
|
||||
drop(rtxn);
|
||||
|
||||
// Once the pending update have been successfully processed
|
||||
// we must remove the content from the pending stores and
|
||||
// write the *new* meta to the processed-meta store and commit.
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
self.pending_meta.delete(&mut wtxn, &first_id)?;
|
||||
self.pending.delete(&mut wtxn, &first_id)?;
|
||||
self.processed_meta.put(&mut wtxn, &first_id, &new_meta)?;
|
||||
wtxn.commit()?;
|
||||
|
||||
Ok(Some((first_id.get(), new_meta)))
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
/// The id and metadata of the update that is currently being processed,
|
||||
/// `None` if no update is being processed.
|
||||
pub fn processing_update(&self) -> heed::Result<Option<(u64, M)>>
|
||||
where
|
||||
M: for<'a> Deserialize<'a>,
|
||||
{
|
||||
let rtxn = self.env.read_txn()?;
|
||||
match self.pending_meta.first(&rtxn)? {
|
||||
Some((key, meta)) => Ok(Some((key.get(), meta))),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute the user defined function with the meta-store iterators, the first
|
||||
/// iterator is the *processed* meta one, the second the *aborted* meta one
|
||||
/// and, the last is the *pending* meta one.
|
||||
pub fn iter_metas<F, T>(&self, mut f: F) -> heed::Result<T>
|
||||
where
|
||||
M: for<'a> Deserialize<'a>,
|
||||
N: for<'a> Deserialize<'a>,
|
||||
F: for<'a> FnMut(
|
||||
heed::RoIter<'a, OwnedType<BEU64>, SerdeJson<N>>,
|
||||
heed::RoIter<'a, OwnedType<BEU64>, SerdeJson<M>>,
|
||||
heed::RoIter<'a, OwnedType<BEU64>, SerdeJson<M>>,
|
||||
) -> heed::Result<T>,
|
||||
{
|
||||
let rtxn = self.env.read_txn()?;
|
||||
|
||||
// We get the pending, processed and aborted meta iterators.
|
||||
let processed_iter = self.processed_meta.iter(&rtxn)?;
|
||||
let aborted_iter = self.aborted_meta.iter(&rtxn)?;
|
||||
let pending_iter = self.pending_meta.iter(&rtxn)?;
|
||||
|
||||
// We execute the user defined function with both iterators.
|
||||
(f)(processed_iter, aborted_iter, pending_iter)
|
||||
}
|
||||
|
||||
/// Returns the update associated meta or `None` if the update doesn't exist.
|
||||
pub fn meta(&self, update_id: u64) -> heed::Result<Option<UpdateStatusMeta<M, N>>>
|
||||
where
|
||||
M: for<'a> Deserialize<'a>,
|
||||
N: for<'a> Deserialize<'a>,
|
||||
{
|
||||
let rtxn = self.env.read_txn()?;
|
||||
let key = BEU64::new(update_id);
|
||||
|
||||
if let Some(meta) = self.pending_meta.get(&rtxn, &key)? {
|
||||
return Ok(Some(UpdateStatusMeta::Pending(meta)));
|
||||
}
|
||||
|
||||
if let Some(meta) = self.processed_meta.get(&rtxn, &key)? {
|
||||
return Ok(Some(UpdateStatusMeta::Processed(meta)));
|
||||
}
|
||||
|
||||
if let Some(meta) = self.aborted_meta.get(&rtxn, &key)? {
|
||||
return Ok(Some(UpdateStatusMeta::Aborted(meta)));
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
/// Aborts an update, an aborted update content is deleted and
|
||||
/// the meta of it is moved into the aborted updates database.
|
||||
///
|
||||
/// Trying to abort an update that is currently being processed, an update
|
||||
/// that as already been processed or which doesn't actually exist, will
|
||||
/// return `None`.
|
||||
pub fn abort_update(&self, update_id: u64) -> heed::Result<Option<M>>
|
||||
where
|
||||
M: Serialize + for<'a> Deserialize<'a>,
|
||||
{
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
let key = BEU64::new(update_id);
|
||||
|
||||
// We cannot abort an update that is currently being processed.
|
||||
if self.pending_meta.first(&wtxn)?.map(|(key, _)| key.get()) == Some(update_id) {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let meta = match self.pending_meta.get(&wtxn, &key)? {
|
||||
Some(meta) => meta,
|
||||
None => return Ok(None),
|
||||
};
|
||||
|
||||
self.aborted_meta.put(&mut wtxn, &key, &meta)?;
|
||||
self.pending_meta.delete(&mut wtxn, &key)?;
|
||||
self.pending.delete(&mut wtxn, &key)?;
|
||||
|
||||
wtxn.commit()?;
|
||||
|
||||
Ok(Some(meta))
|
||||
}
|
||||
|
||||
/// Aborts all the pending updates, and not the one being currently processed.
|
||||
/// Returns the update metas and ids that were successfully aborted.
|
||||
pub fn abort_pendings(&self) -> heed::Result<Vec<(u64, M)>>
|
||||
where
|
||||
M: Serialize + for<'a> Deserialize<'a>,
|
||||
{
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
let mut aborted_updates = Vec::new();
|
||||
|
||||
// We skip the first pending update as it is currently being processed.
|
||||
for result in self.pending_meta.iter(&wtxn)?.skip(1) {
|
||||
let (key, meta) = result?;
|
||||
let id = key.get();
|
||||
aborted_updates.push((id, meta));
|
||||
}
|
||||
|
||||
for (id, meta) in &aborted_updates {
|
||||
let key = BEU64::new(*id);
|
||||
self.aborted_meta.put(&mut wtxn, &key, &meta)?;
|
||||
self.pending_meta.delete(&mut wtxn, &key)?;
|
||||
self.pending.delete(&mut wtxn, &key)?;
|
||||
}
|
||||
|
||||
wtxn.commit()?;
|
||||
|
||||
Ok(aborted_updates)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Hash)]
|
||||
pub enum UpdateStatusMeta<M, N> {
|
||||
Pending(M),
|
||||
Processed(N),
|
||||
Aborted(M),
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::thread;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn simple() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let options = EnvOpenOptions::new();
|
||||
let update_store = UpdateStore::open(options, dir, |_id, meta: String, _content: &_| {
|
||||
Ok(meta + " processed")
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let meta = String::from("kiki");
|
||||
let update_id = update_store.register_update(&meta, &[]).unwrap();
|
||||
|
||||
thread::sleep(Duration::from_millis(100));
|
||||
|
||||
let meta = update_store.meta(update_id).unwrap().unwrap();
|
||||
assert_eq!(meta, UpdateStatusMeta::Processed(format!("kiki processed")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[ignore]
|
||||
fn long_running_update() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let options = EnvOpenOptions::new();
|
||||
let update_store = UpdateStore::open(options, dir, |_id, meta: String, _content: &_| {
|
||||
thread::sleep(Duration::from_millis(400));
|
||||
Ok(meta + " processed")
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let before_register = Instant::now();
|
||||
|
||||
let meta = String::from("kiki");
|
||||
let update_id_kiki = update_store.register_update(&meta, &[]).unwrap();
|
||||
assert!(before_register.elapsed() < Duration::from_millis(200));
|
||||
|
||||
let meta = String::from("coco");
|
||||
let update_id_coco = update_store.register_update(&meta, &[]).unwrap();
|
||||
assert!(before_register.elapsed() < Duration::from_millis(200));
|
||||
|
||||
let meta = String::from("cucu");
|
||||
let update_id_cucu = update_store.register_update(&meta, &[]).unwrap();
|
||||
assert!(before_register.elapsed() < Duration::from_millis(200));
|
||||
|
||||
thread::sleep(Duration::from_millis(400 * 3 + 100));
|
||||
|
||||
let meta = update_store.meta(update_id_kiki).unwrap().unwrap();
|
||||
assert_eq!(meta, UpdateStatusMeta::Processed(format!("kiki processed")));
|
||||
|
||||
let meta = update_store.meta(update_id_coco).unwrap().unwrap();
|
||||
assert_eq!(meta, UpdateStatusMeta::Processed(format!("coco processed")));
|
||||
|
||||
let meta = update_store.meta(update_id_cucu).unwrap().unwrap();
|
||||
assert_eq!(meta, UpdateStatusMeta::Processed(format!("cucu processed")));
|
||||
}
|
||||
}
|
@ -1,102 +0,0 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<link rel="stylesheet" href="/bulma.min.css">
|
||||
<link rel="stylesheet" href="/bulma-prefers-dark.min.css">
|
||||
<link rel="stylesheet" href="/style.css">
|
||||
<script type="text/javascript" src="/jquery-3.4.1.min.js"></script>
|
||||
<script type="text/javascript" src="/filesize.min.js"></script>
|
||||
<title>{{ db_name }} | The milli engine</title>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<section class="hero is-light">
|
||||
<div class="hero-body">
|
||||
<div class="container">
|
||||
|
||||
<div class="columns is-flex is-centered mb-6">
|
||||
<figure class="image is-128x128">
|
||||
<img id="logo-white" src="logo-white.svg" alt="milli logo in white">
|
||||
<img id="logo-black" src="logo-black.svg" alt="milli logo in black">
|
||||
</figure>
|
||||
</div>
|
||||
|
||||
<nav class="level">
|
||||
<div class="level-item has-text-centered">
|
||||
<div>
|
||||
<p class="heading">Database Name</p>
|
||||
<p class="title">{{ db_name }}</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="level-item has-text-centered">
|
||||
<div>
|
||||
<p class="heading">Database Size</p>
|
||||
<p class="title" id="db-size">{{ db_size }}</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="level-item has-text-centered">
|
||||
<div>
|
||||
<p class="heading">Number of Documents</p>
|
||||
<p class="title" id="docs-count">{{ docs_count }}</p>
|
||||
</div>
|
||||
<button id="display_facets" class="button is-success">display facets</button>
|
||||
</div>
|
||||
</nav>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section class="hero container">
|
||||
<div class="notification" style="border-radius: 0 0 4px 4px;">
|
||||
|
||||
<nav class="level">
|
||||
<!-- Left side -->
|
||||
<div class="level-left">
|
||||
<div class="level-item">
|
||||
<div class="field has-addons has-addons-right">
|
||||
<input id="query" class="input" type="text" autofocus placeholder="e.g. George Clooney">
|
||||
<input id="filters" class="input" type="text" placeholder="filters like released >= 1577836800">
|
||||
</div>
|
||||
</div>
|
||||
<div class="level-item"></div>
|
||||
</div>
|
||||
|
||||
<!-- Right side -->
|
||||
<nav class="level-right">
|
||||
<div class="level-item has-text-centered">
|
||||
<div>
|
||||
<p class="heading">Candidates</p>
|
||||
<p id="count" class="title">0</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="level-item has-text-centered">
|
||||
</div>
|
||||
<div class="level-item has-text-centered">
|
||||
<div>
|
||||
<p class="heading">Time Spent</p>
|
||||
<p id="time" class="title">0ms</p>
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
</nav>
|
||||
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section id="facets">
|
||||
<!-- facet values -->
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<ol id="results" class="content">
|
||||
<!-- documents matching requests -->
|
||||
</ol>
|
||||
</section>
|
||||
|
||||
</body>
|
||||
|
||||
<script type="text/javascript" src="/script.js"></script>
|
||||
|
||||
</html>
|
@ -1,95 +0,0 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<link rel="stylesheet" href="/bulma.min.css">
|
||||
<link rel="stylesheet" href="/bulma-prefers-dark.min.css">
|
||||
<link rel="stylesheet" href="/style.css">
|
||||
<script type="text/javascript" src="/jquery-3.4.1.min.js"></script>
|
||||
<script type="text/javascript" src="/filesize.min.js"></script>
|
||||
<title>{{ db_name }} | Updates</title>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<section class="hero is-light">
|
||||
<div class="hero-body">
|
||||
<div class="container">
|
||||
|
||||
<a href="/">
|
||||
<div class="columns is-flex is-centered mb-6">
|
||||
<figure class="image is-128x128">
|
||||
<img id="logo-white" src="logo-white.svg" alt="milli logo in white">
|
||||
<img id="logo-black" src="logo-black.svg" alt="milli logo in black">
|
||||
</figure>
|
||||
</div>
|
||||
</a>
|
||||
|
||||
<nav class="level">
|
||||
<div class="level-item has-text-centered">
|
||||
<div>
|
||||
<p class="heading">Database Name</p>
|
||||
<p class="title">{{ db_name }}</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="level-item has-text-centered">
|
||||
<div>
|
||||
<p class="heading">Database Size</p>
|
||||
<p class="title" id="db-size">{{ db_size }}</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="level-item has-text-centered">
|
||||
<div>
|
||||
<p class="heading">Number of Documents</p>
|
||||
<p class="title" id="docs-count">{{ docs_count }}</p>
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<ol id="results" class="content">
|
||||
|
||||
{% for update in updates %}
|
||||
{% match update %}
|
||||
{% when UpdateStatus::Pending with { update_id, meta } %}
|
||||
<li id="update-{{ update_id }}" class="document">
|
||||
<ol>
|
||||
<li class="field">
|
||||
<div class="attribute">update id</div><div class="updateId content">{{ update_id }}</div>
|
||||
<div class="attribute">update status</div><div class="updateStatus content">pending</div>
|
||||
</li>
|
||||
</ol>
|
||||
</li>
|
||||
{% when UpdateStatus::Processed with { update_id, meta } %}
|
||||
<li id="update-{{ update_id }}" class="document">
|
||||
<ol>
|
||||
<li class="field">
|
||||
<div class="attribute">update id</div><div class="updateId content">{{ update_id }}</div>
|
||||
<div class="attribute">update status</div><div class="updateStatus content">{{ meta }}</div>
|
||||
</li>
|
||||
</ol>
|
||||
</li>
|
||||
{% when UpdateStatus::Aborted with { update_id, meta } %}
|
||||
<li id="update-{{ update_id }}" class="document">
|
||||
<ol>
|
||||
<li class="field">
|
||||
<div class="attribute">update id</div><div class="updateId content">{{ update_id }}</div>
|
||||
<div class="attribute">update status</div><div class="updateStatus content">aborted</div>
|
||||
</li>
|
||||
</ol>
|
||||
</li>
|
||||
{% else %}
|
||||
{% endmatch %}
|
||||
{% endfor %}
|
||||
|
||||
</ol>
|
||||
</section>
|
||||
|
||||
</body>
|
||||
|
||||
<script type="text/javascript" src="/updates-script.js"></script>
|
||||
|
||||
</html>
|
@ -1,17 +0,0 @@
|
||||
[package]
|
||||
name = "infos"
|
||||
version = "0.33.4"
|
||||
authors = ["Clément Renault <clement@meilisearch.com>"]
|
||||
edition = "2018"
|
||||
publish = false
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.56"
|
||||
byte-unit = { version = "4.0.14", default-features = false, features = ["std"] }
|
||||
csv = "1.1.6"
|
||||
milli = { path = "../milli" }
|
||||
mimalloc = { version = "0.1.29", default-features = false }
|
||||
roaring = "0.9.0"
|
||||
serde_json = "1.0.79"
|
||||
stderrlog = "0.5.1"
|
||||
structopt = { version = "0.3.26", default-features = false }
|
1221
infos/src/main.rs
1221
infos/src/main.rs
File diff suppressed because it is too large
Load Diff
@ -1,26 +0,0 @@
|
||||
# Milli
|
||||
|
||||
## Fuzzing milli
|
||||
|
||||
Currently you can only fuzz the indexation.
|
||||
To execute the fuzzer run:
|
||||
```
|
||||
cargo +nightly fuzz run indexing
|
||||
```
|
||||
|
||||
To execute the fuzzer on multiple thread you can also run:
|
||||
```
|
||||
cargo +nightly fuzz run -j4 indexing
|
||||
```
|
||||
|
||||
Since the fuzzer is going to create a lot of temporary file to let milli index its documents
|
||||
I would also recommand to execute it on a ramdisk.
|
||||
Here is how to setup a ramdisk on linux:
|
||||
```
|
||||
sudo mount -t tmpfs none path/to/your/ramdisk
|
||||
```
|
||||
And then set the [TMPDIR](https://doc.rust-lang.org/std/env/fn.temp_dir.html) environment variable
|
||||
to make the fuzzer create its file in it:
|
||||
```
|
||||
export TMPDIR=path/to/your/ramdisk
|
||||
```
|
5
milli/fuzz/.gitignore
vendored
5
milli/fuzz/.gitignore
vendored
@ -1,5 +0,0 @@
|
||||
Cargo.lock
|
||||
target/
|
||||
|
||||
/corpus/
|
||||
/artifacts/
|
@ -1,34 +0,0 @@
|
||||
[package]
|
||||
name = "milli-fuzz"
|
||||
version = "0.0.0"
|
||||
authors = ["Automatically generated"]
|
||||
publish = false
|
||||
edition = "2018"
|
||||
|
||||
[package.metadata]
|
||||
cargo-fuzz = true
|
||||
|
||||
[dependencies]
|
||||
arbitrary = "1.0"
|
||||
libfuzzer-sys = "0.4"
|
||||
serde_json = { version = "1.0.62", features = ["preserve_order"] }
|
||||
anyhow = "1.0"
|
||||
tempfile = "3.3"
|
||||
arbitrary-json = "0.1.0"
|
||||
mimalloc = { version = "0.1.29", default-features = false }
|
||||
|
||||
[dependencies.milli]
|
||||
path = ".."
|
||||
|
||||
# Prevent this from interfering with workspaces
|
||||
[workspace]
|
||||
members = ["."]
|
||||
|
||||
[profile.release]
|
||||
debug = true
|
||||
|
||||
[[bin]]
|
||||
name = "indexing"
|
||||
path = "fuzz_targets/indexing.rs"
|
||||
test = false
|
||||
doc = false
|
@ -1,114 +0,0 @@
|
||||
#![no_main]
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::io::{BufWriter, Cursor, Read, Seek, Write};
|
||||
|
||||
use anyhow::{bail, Result};
|
||||
use arbitrary_json::ArbitraryValue;
|
||||
use libfuzzer_sys::fuzz_target;
|
||||
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||
use milli::heed::EnvOpenOptions;
|
||||
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
||||
use milli::{Index, Object};
|
||||
use serde_json::{Map, Value};
|
||||
|
||||
#[global_allocator]
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
/// reads json from input and write an obkv batch to writer.
|
||||
pub fn read_json(input: impl Read, writer: impl Write + Seek) -> Result<usize> {
|
||||
let writer = BufWriter::new(writer);
|
||||
let mut builder = DocumentsBatchBuilder::new(writer);
|
||||
|
||||
let values: Vec<Object> = serde_json::from_reader(input)?;
|
||||
if builder.documents_count() == 0 {
|
||||
bail!("Empty payload");
|
||||
}
|
||||
|
||||
for object in values {
|
||||
builder.append_json_object(&object)?;
|
||||
}
|
||||
|
||||
let count = builder.documents_count();
|
||||
let vector = builder.into_inner()?;
|
||||
|
||||
Ok(count as usize)
|
||||
}
|
||||
|
||||
fn index_documents(
|
||||
index: &mut milli::Index,
|
||||
documents: DocumentsBatchReader<Cursor<Vec<u8>>>,
|
||||
) -> Result<()> {
|
||||
let config = IndexerConfig::default();
|
||||
let mut wtxn = index.write_txn()?;
|
||||
|
||||
let indexing_config = IndexDocumentsConfig::default();
|
||||
let mut builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())?;
|
||||
builder.add_documents(documents)?;
|
||||
builder.execute().unwrap();
|
||||
|
||||
wtxn.commit()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn create_index() -> Result<milli::Index> {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(10 * 1024 * 1024 * 1024); // 10 GB
|
||||
options.max_readers(1);
|
||||
let index = Index::new(options, dir.path())?;
|
||||
|
||||
let config = IndexerConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
|
||||
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||
|
||||
let displayed_fields =
|
||||
["id", "title", "album", "artist", "genre", "country", "released", "duration"]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
builder.set_displayed_fields(displayed_fields);
|
||||
|
||||
let searchable_fields = ["title", "album", "artist"].iter().map(|s| s.to_string()).collect();
|
||||
builder.set_searchable_fields(searchable_fields);
|
||||
|
||||
let faceted_fields: HashSet<String> =
|
||||
["released-timestamp", "duration-float", "genre", "country", "artist"]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
builder.set_filterable_fields(faceted_fields.clone());
|
||||
builder.set_sortable_fields(faceted_fields);
|
||||
|
||||
builder.set_distinct_field("same".to_string());
|
||||
|
||||
builder.execute(|_| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
Ok(index)
|
||||
}
|
||||
|
||||
fuzz_target!(|batches: Vec<Vec<ArbitraryValue>>| {
|
||||
if let Ok(mut index) = create_index() {
|
||||
for batch in batches {
|
||||
let documents: Vec<Value> =
|
||||
batch.into_iter().map(|value| serde_json::Value::from(value)).collect();
|
||||
let json = Value::Array(documents);
|
||||
let json = serde_json::to_string(&json).unwrap();
|
||||
|
||||
let mut documents = Cursor::new(Vec::new());
|
||||
|
||||
// We ignore all malformed documents
|
||||
if let Ok(_) = read_json(json.as_bytes(), &mut documents) {
|
||||
documents.rewind().unwrap();
|
||||
let documents = DocumentsBatchReader::from_reader(documents).unwrap();
|
||||
// A lot of errors can come out of milli and we don't know which ones are normal or not
|
||||
// so we are only going to look for the unexpected panics.
|
||||
let _ = index_documents(&mut index, documents);
|
||||
}
|
||||
}
|
||||
|
||||
index.prepare_for_closing().wait();
|
||||
}
|
||||
});
|
Loading…
x
Reference in New Issue
Block a user