mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-27 05:37:31 +01:00
Merge #636
636: Remove unused `infos`, `http-ui`, and `milli/fuzz`, crates r=ManyTheFish a=loiclec We haven't used the `infos/`, `http-ui/` and `milli/fuzz/` crates in a long time. They are not properly maintained and probably do not work correctly anymore. This PR removes these crates entirely from the workspace to reduce the amount of code we need to maintain. Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
This commit is contained in:
commit
d94339a858
@ -45,19 +45,6 @@ We recommend using the `--release` flag to test the full performance.
|
||||
cargo test
|
||||
```
|
||||
|
||||
### Querying the engine via the web interface
|
||||
|
||||
To help you develop your feature you might need to use a web interface! You can query the engine by going to [the HTML page itself](http://127.0.0.1:9700).
|
||||
|
||||
### Compile and run the HTTP debug server
|
||||
|
||||
You can specify the number of threads to use to index documents and many other settings too.
|
||||
|
||||
```bash
|
||||
cd http-ui
|
||||
cargo run --release -- --db my-database.mdb -vvv --indexing-jobs 8
|
||||
```
|
||||
|
||||
### Index your documents
|
||||
|
||||
It can index a massive amount of documents in not much time, I already achieved to index:
|
||||
|
@ -1,6 +1,6 @@
|
||||
[workspace]
|
||||
resolver = "2"
|
||||
members = ["milli", "filter-parser", "flatten-serde-json", "json-depth-checker", "http-ui", "benchmarks", "infos", "helpers", "cli"]
|
||||
members = ["milli", "filter-parser", "flatten-serde-json", "json-depth-checker", "benchmarks", "helpers", "cli"]
|
||||
default-members = ["milli"]
|
||||
|
||||
[profile.dev]
|
||||
|
@ -1,5 +1,5 @@
|
||||
<p align="center">
|
||||
<img alt="the milli logo" src="http-ui/public/logo-black.svg">
|
||||
<img alt="the milli logo" src="logo-black.svg">
|
||||
</p>
|
||||
|
||||
<p align="center">a concurrent indexer combined with fast and relevant search algorithms</p>
|
||||
@ -19,8 +19,6 @@ This repository contains crates to quickly debug the engine:
|
||||
- The `filter-parser` crate contains the parser for the Meilisearch filter syntax.
|
||||
- The `flatten-serde-json` crate contains the library that flattens serde-json `Value` objects like Elasticsearch does.
|
||||
- The `helpers` crate is only used to do operations on the database.
|
||||
- The `http-ui` crate is a simple HTTP dashboard to test the features like for real!
|
||||
- The `infos` crate is used to dump the internal data-structure and ensure correctness.
|
||||
- The `json-depth-checker` crate is used to indicate if a JSON must be flattened.
|
||||
|
||||
## How to use it?
|
||||
|
@ -1,47 +0,0 @@
|
||||
[package]
|
||||
name = "http-ui"
|
||||
description = "The HTTP user interface of the milli search engine"
|
||||
version = "0.33.4"
|
||||
authors = ["Clément Renault <clement@meilisearch.com>"]
|
||||
edition = "2018"
|
||||
publish = false
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.56"
|
||||
byte-unit = { version = "4.0.14", default-features = false, features = ["std"] }
|
||||
crossbeam-channel = "0.5.2"
|
||||
memmap2 = "0.5.3"
|
||||
milli = { path = "../milli" }
|
||||
mimalloc = { version = "0.1.29", default-features = false }
|
||||
once_cell = "1.10.0"
|
||||
rayon = "1.5.1"
|
||||
structopt = { version = "0.3.26", default-features = false, features = ["wrap_help"] }
|
||||
tempfile = "3.3.0"
|
||||
|
||||
# http server
|
||||
askama = "0.11.1"
|
||||
askama_warp = "0.12.0"
|
||||
bytes = "1.1.0"
|
||||
either = "1.6.1"
|
||||
flate2 = "1.0.22"
|
||||
futures = "0.3.21"
|
||||
serde = { version = "1.0.136", features = ["derive"] }
|
||||
serde_json = { version = "1.0.79", features = ["preserve_order"] }
|
||||
tokio = { version = "1.17.0", features = ["full"] }
|
||||
tokio-stream = { version = "0.1.8", default-features = false, features = ["sync"] }
|
||||
warp = "0.3.2"
|
||||
|
||||
# logging
|
||||
fst = "0.4.7"
|
||||
log = "0.4.17"
|
||||
stderrlog = "0.5.1"
|
||||
|
||||
# Temporary fix for bitvec, remove once fixed. (https://github.com/bitvecto-rs/bitvec/issues/105)
|
||||
bimap = "0.6.2"
|
||||
csv = "1.1.6"
|
||||
funty = "2.0.0"
|
||||
|
||||
[dev-dependencies]
|
||||
maplit = "1.0.2"
|
||||
serde_test = "1.0.136"
|
||||
|
1
http-ui/public/bulma-prefers-dark.min.css
vendored
1
http-ui/public/bulma-prefers-dark.min.css
vendored
File diff suppressed because one or more lines are too long
1
http-ui/public/bulma.min.css
vendored
1
http-ui/public/bulma.min.css
vendored
File diff suppressed because one or more lines are too long
5
http-ui/public/filesize.min.js
vendored
5
http-ui/public/filesize.min.js
vendored
@ -1,5 +0,0 @@
|
||||
/*
|
||||
2020 Jason Mulligan <jason.mulligan@avoidwork.com>
|
||||
@version 6.1.0
|
||||
*/
|
||||
"use strict";!function(e){var x=/^(b|B)$/,M={iec:{bits:["b","Kib","Mib","Gib","Tib","Pib","Eib","Zib","Yib"],bytes:["B","KiB","MiB","GiB","TiB","PiB","EiB","ZiB","YiB"]},jedec:{bits:["b","Kb","Mb","Gb","Tb","Pb","Eb","Zb","Yb"],bytes:["B","KB","MB","GB","TB","PB","EB","ZB","YB"]}},w={iec:["","kibi","mebi","gibi","tebi","pebi","exbi","zebi","yobi"],jedec:["","kilo","mega","giga","tera","peta","exa","zetta","yotta"]};function t(e){var i,t,o,n,b,r,a,l,s,d,u,c,f,p,B,y=1<arguments.length&&void 0!==arguments[1]?arguments[1]:{},g=[],v=0,m=void 0,h=void 0;if(isNaN(e))throw new TypeError("Invalid number");return t=!0===y.bits,u=!0===y.unix,i=y.base||2,d=void 0!==y.round?y.round:u?1:2,r=void 0!==y.locale?y.locale:"",a=y.localeOptions||{},c=void 0!==y.separator?y.separator:"",f=void 0!==y.spacer?y.spacer:u?"":" ",B=y.symbols||{},p=2===i&&y.standard||"jedec",s=y.output||"string",n=!0===y.fullform,b=y.fullforms instanceof Array?y.fullforms:[],m=void 0!==y.exponent?y.exponent:-1,o=2<i?1e3:1024,(l=(h=Number(e))<0)&&(h=-h),(-1===m||isNaN(m))&&(m=Math.floor(Math.log(h)/Math.log(o)))<0&&(m=0),8<m&&(m=8),"exponent"===s?m:(0===h?(g[0]=0,g[1]=u?"":M[p][t?"bits":"bytes"][m]):(v=h/(2===i?Math.pow(2,10*m):Math.pow(1e3,m)),t&&o<=(v*=8)&&m<8&&(v/=o,m++),g[0]=Number(v.toFixed(0<m?d:0)),g[0]===o&&m<8&&void 0===y.exponent&&(g[0]=1,m++),g[1]=10===i&&1===m?t?"kb":"kB":M[p][t?"bits":"bytes"][m],u&&(g[1]="jedec"===p?g[1].charAt(0):0<m?g[1].replace(/B$/,""):g[1],x.test(g[1])&&(g[0]=Math.floor(g[0]),g[1]=""))),l&&(g[0]=-g[0]),g[1]=B[g[1]]||g[1],!0===r?g[0]=g[0].toLocaleString():0<r.length?g[0]=g[0].toLocaleString(r,a):0<c.length&&(g[0]=g[0].toString().replace(".",c)),"array"===s?g:(n&&(g[1]=b[m]?b[m]:w[p][m]+(t?"bit":"byte")+(1===g[0]?"":"s")),"object"===s?{value:g[0],symbol:g[1],exponent:m}:g.join(f)))}t.partial=function(i){return function(e){return t(e,i)}},"undefined"!=typeof exports?module.exports=t:"function"==typeof define&&void 0!==define.amd?define(function(){return t}):e.filesize=t}("undefined"!=typeof window?window:global);
|
2
http-ui/public/jquery-3.4.1.min.js
vendored
2
http-ui/public/jquery-3.4.1.min.js
vendored
File diff suppressed because one or more lines are too long
@ -1,6 +0,0 @@
|
||||
<svg width="277" height="236" viewBox="0 0 277 236" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<path fill-rule="evenodd" clip-rule="evenodd" d="M213.085 190L242.907 86H276.196L246.375 190H213.085Z" fill="#494949"/>
|
||||
<path fill-rule="evenodd" clip-rule="evenodd" d="M0 190L29.8215 86H63.1111L33.2896 190H0Z" fill="#494949"/>
|
||||
<path fill-rule="evenodd" clip-rule="evenodd" d="M124.986 0L57.5772 235.083L60.7752 236H90.6038L158.276 0H124.986Z" fill="#494949"/>
|
||||
<path fill-rule="evenodd" clip-rule="evenodd" d="M195.273 0L127.601 236H160.891L228.563 0H195.273Z" fill="#494949"/>
|
||||
</svg>
|
Before Width: | Height: | Size: 585 B |
@ -1,6 +0,0 @@
|
||||
<svg width="277" height="236" viewBox="0 0 277 236" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<path fill-rule="evenodd" clip-rule="evenodd" d="M213.508 190L243.33 86H276.619L246.798 190H213.508Z" fill="#B5B5B5"/>
|
||||
<path fill-rule="evenodd" clip-rule="evenodd" d="M0.422791 190L30.2443 86H63.5339L33.7124 190H0.422791Z" fill="#B5B5B5"/>
|
||||
<path fill-rule="evenodd" clip-rule="evenodd" d="M125.409 0L58 235.083L61.198 236H91.0266L158.699 0H125.409Z" fill="#B5B5B5"/>
|
||||
<path fill-rule="evenodd" clip-rule="evenodd" d="M195.696 0L128.024 236H161.313L228.985 0H195.696Z" fill="#B5B5B5"/>
|
||||
</svg>
|
Before Width: | Height: | Size: 592 B |
@ -1,154 +0,0 @@
|
||||
var request = null;
|
||||
var timeoutID = null;
|
||||
var display_facets = false;
|
||||
|
||||
$('#query, #filters').on('input', function () {
|
||||
var query = $('#query').val();
|
||||
var filters = $('#filters').val();
|
||||
var timeoutMs = 100;
|
||||
|
||||
if (timeoutID !== null) {
|
||||
window.clearTimeout(timeoutID);
|
||||
}
|
||||
|
||||
timeoutID = window.setTimeout(function () {
|
||||
request = $.ajax({
|
||||
type: "POST",
|
||||
url: "query",
|
||||
contentType: 'application/json',
|
||||
data: JSON.stringify({
|
||||
'query': query,
|
||||
'filters': filters,
|
||||
"facetDistribution": display_facets,
|
||||
}),
|
||||
contentType: 'application/json',
|
||||
success: function (data, textStatus, request) {
|
||||
results.innerHTML = '';
|
||||
facets.innerHTML = '';
|
||||
|
||||
let timeSpent = request.getResponseHeader('Time-Ms');
|
||||
let numberOfDocuments = data.documents.length;
|
||||
count.innerHTML = data.numberOfCandidates.toLocaleString();
|
||||
time.innerHTML = `${timeSpent}ms`;
|
||||
time.classList.remove('fade-in-out');
|
||||
|
||||
for (facet_name in data.facets) {
|
||||
for (value in data.facets[facet_name]) {
|
||||
const elem = document.createElement('span');
|
||||
const count = data.facets[facet_name][value];
|
||||
elem.classList.add("tag");
|
||||
elem.setAttribute('data-name', facet_name);
|
||||
elem.setAttribute('data-value', value);
|
||||
elem.innerHTML = `${facet_name}:${value} (${count})`;
|
||||
facets.appendChild(elem);
|
||||
}
|
||||
}
|
||||
|
||||
for (element of data.documents) {
|
||||
const elem = document.createElement('li');
|
||||
elem.classList.add("document");
|
||||
|
||||
const ol = document.createElement('ol');
|
||||
|
||||
for (const prop in element) {
|
||||
const field = document.createElement('li');
|
||||
field.classList.add("field");
|
||||
|
||||
const attribute = document.createElement('div');
|
||||
attribute.classList.add("attribute");
|
||||
attribute.innerHTML = prop;
|
||||
|
||||
const content = document.createElement('div');
|
||||
content.classList.add("content");
|
||||
|
||||
// Stringify Objects and Arrays to avoid [Object object]
|
||||
if (typeof element[prop] === 'object' && element[prop] !== null) {
|
||||
content.innerHTML = JSON.stringify(element[prop]);
|
||||
} else {
|
||||
content.innerHTML = element[prop];
|
||||
}
|
||||
|
||||
field.appendChild(attribute);
|
||||
field.appendChild(content);
|
||||
|
||||
ol.appendChild(field);
|
||||
}
|
||||
|
||||
elem.appendChild(ol);
|
||||
results.appendChild(elem);
|
||||
}
|
||||
|
||||
// When we click on a tag we append the facet value
|
||||
// at the end of the facet query.
|
||||
$('#facets .tag').on('click', function () {
|
||||
let name = $(this).attr("data-name");
|
||||
let value = $(this).attr("data-value");
|
||||
|
||||
let facet_query = $('#filters').val().trim();
|
||||
if (facet_query === "") {
|
||||
$('#filters').val(`${name} = "${value}"`).trigger('input');
|
||||
} else {
|
||||
$('#filters').val(`${facet_query} AND ${name} = "${value}"`).trigger('input');
|
||||
}
|
||||
});
|
||||
},
|
||||
beforeSend: function () {
|
||||
if (request !== null) {
|
||||
request.abort();
|
||||
time.classList.add('fade-in-out');
|
||||
}
|
||||
},
|
||||
});
|
||||
}, timeoutMs);
|
||||
});
|
||||
|
||||
function diffArray(arr1, arr2) {
|
||||
return arr1.concat(arr2).filter(function (val) {
|
||||
if (!(arr1.includes(val) && arr2.includes(val)))
|
||||
return val;
|
||||
});
|
||||
}
|
||||
|
||||
function selectedFacetsToArray(facets_obj) {
|
||||
var array = [];
|
||||
for (const facet_name in facets_obj) {
|
||||
var subarray = [];
|
||||
for (const facet_value of facets_obj[facet_name]) {
|
||||
subarray.push(`${facet_name}:${facet_value}`);
|
||||
}
|
||||
array.push(subarray);
|
||||
}
|
||||
return array;
|
||||
}
|
||||
|
||||
$('#display_facets').click(function() {
|
||||
if (display_facets) {
|
||||
display_facets = false;
|
||||
$('#display_facets').html("Display facets")
|
||||
$('#display_facets').removeClass("is-danger");
|
||||
$('#display_facets').addClass("is-success");
|
||||
$('#facets').hide();
|
||||
} else {
|
||||
display_facets = true;
|
||||
$('#display_facets').html("Hide facets")
|
||||
$('#display_facets').addClass("is-danger");
|
||||
$('#display_facets').removeClass("is-success");
|
||||
$('#facets').show();
|
||||
}
|
||||
});
|
||||
|
||||
// Make the number of document a little bit prettier
|
||||
$('#docs-count').text(function(index, text) {
|
||||
return parseInt(text).toLocaleString()
|
||||
});
|
||||
|
||||
// Make the database a little bit easier to read
|
||||
$('#db-size').text(function(index, text) {
|
||||
return filesize(parseInt(text))
|
||||
});
|
||||
|
||||
// We trigger the input when we load the script.
|
||||
$(window).on('load', function () {
|
||||
// We execute a placeholder search when the input is empty.
|
||||
$('#query').trigger('input');
|
||||
});
|
@ -1,144 +0,0 @@
|
||||
#results {
|
||||
max-width: 900px;
|
||||
margin: 20px auto 0 auto;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
#facets .tag {
|
||||
margin-right: 1em;
|
||||
margin-bottom: 1em;
|
||||
}
|
||||
|
||||
#facets {
|
||||
display: none;
|
||||
max-width: 900px;
|
||||
margin: 20px auto 0 auto;
|
||||
padding: 0;
|
||||
max-height: 16em;
|
||||
overflow: scroll;
|
||||
}
|
||||
|
||||
#display_facets {
|
||||
margin: 20px auto 0 auto;
|
||||
padding: 5px;
|
||||
max-height: 16em;
|
||||
overflow: scroll;
|
||||
}
|
||||
|
||||
#facets .tag:hover {
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
#logo-white {
|
||||
display: none;
|
||||
}
|
||||
|
||||
#logo-black {
|
||||
display: inherit;
|
||||
}
|
||||
|
||||
.notification {
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
}
|
||||
|
||||
.document {
|
||||
padding: 20px 20px;
|
||||
background-color: #f5f5f5;
|
||||
border-radius: 4px;
|
||||
margin-bottom: 20px;
|
||||
display: flex;
|
||||
}
|
||||
|
||||
.document ol {
|
||||
flex: 0 0 75%;
|
||||
max-width: 75%;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
.document .image {
|
||||
max-width: 25%;
|
||||
flex: 0 0 25%;
|
||||
padding-left: 30px;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
.document .image img {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.field {
|
||||
list-style-type: none;
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
|
||||
.field:not(:last-child) {
|
||||
margin-bottom: 7px;
|
||||
}
|
||||
|
||||
.attribute {
|
||||
flex: 0 0 35%;
|
||||
max-width: 35%;
|
||||
text-align: right;
|
||||
padding-right: 10px;
|
||||
box-sizing: border-box;
|
||||
text-transform: uppercase;
|
||||
opacity: 0.7;
|
||||
}
|
||||
|
||||
.content {
|
||||
max-width: 65%;
|
||||
flex: 0 0 65%;
|
||||
box-sizing: border-box;
|
||||
padding-left: 10px;
|
||||
color: rgba(0,0,0,.9);
|
||||
}
|
||||
|
||||
.content mark {
|
||||
background-color: hsl(204, 86%, 88%);
|
||||
color: hsl(204, 86%, 25%);
|
||||
}
|
||||
|
||||
@keyframes fadeInOut {
|
||||
0% { opacity: 1; }
|
||||
30% { opacity: 0.3; }
|
||||
100% { opacity: 1; }
|
||||
}
|
||||
|
||||
.fade-in-out {
|
||||
animation: fadeInOut ease 1s infinite;
|
||||
}
|
||||
|
||||
@media (prefers-color-scheme:dark) {
|
||||
#logo-white {
|
||||
display: inherit;
|
||||
}
|
||||
|
||||
#logo-black {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.hero.is-light {
|
||||
background-color: #242424;
|
||||
color: inherit;
|
||||
}
|
||||
|
||||
.hero.is-light .title {
|
||||
color: inherit;
|
||||
}
|
||||
|
||||
.document {
|
||||
background-color: #242424;
|
||||
}
|
||||
|
||||
.content {
|
||||
color: #dbdbdb;
|
||||
}
|
||||
|
||||
.content mark {
|
||||
background-color: hsl(0, 0%, 35%);
|
||||
color: hsl(0,0%,90.2%);
|
||||
}
|
||||
}
|
@ -1,102 +0,0 @@
|
||||
$(window).on('load', function () {
|
||||
let wsProtcol = "ws";
|
||||
if (window.location.protocol === 'https') {
|
||||
wsProtcol = 'wss';
|
||||
}
|
||||
|
||||
let url = wsProtcol + '://' + window.location.hostname + ':' + window.location.port + '/updates/ws';
|
||||
var socket = new WebSocket(url);
|
||||
|
||||
socket.onmessage = function (event) {
|
||||
let status = JSON.parse(event.data);
|
||||
|
||||
if (status.type == 'Pending') {
|
||||
const elem = document.createElement('li');
|
||||
elem.classList.add("document");
|
||||
elem.setAttribute("id", 'update-' + status.update_id);
|
||||
|
||||
const ol = document.createElement('ol');
|
||||
const field = document.createElement('li');
|
||||
field.classList.add("field");
|
||||
|
||||
const attributeUpdateId = document.createElement('div');
|
||||
attributeUpdateId.classList.add("attribute");
|
||||
attributeUpdateId.innerHTML = "update id";
|
||||
|
||||
const contentUpdateId = document.createElement('div');
|
||||
contentUpdateId.classList.add("updateId");
|
||||
contentUpdateId.classList.add("content");
|
||||
contentUpdateId.innerHTML = status.update_id;
|
||||
|
||||
field.appendChild(attributeUpdateId);
|
||||
field.appendChild(contentUpdateId);
|
||||
|
||||
const attributeUpdateStatus = document.createElement('div');
|
||||
attributeUpdateStatus.classList.add("attribute");
|
||||
attributeUpdateStatus.innerHTML = "update status";
|
||||
|
||||
const contentUpdateStatus = document.createElement('div');
|
||||
contentUpdateStatus.classList.add("updateStatus");
|
||||
contentUpdateStatus.classList.add("content");
|
||||
contentUpdateStatus.innerHTML = 'pending';
|
||||
|
||||
field.appendChild(attributeUpdateStatus);
|
||||
field.appendChild(contentUpdateStatus);
|
||||
|
||||
ol.appendChild(field);
|
||||
elem.appendChild(ol);
|
||||
|
||||
prependChild(results, elem);
|
||||
}
|
||||
|
||||
if (status.type == "Progressing") {
|
||||
const id = 'update-' + status.update_id;
|
||||
const content = $(`#${id} .updateStatus.content`);
|
||||
|
||||
let html;
|
||||
|
||||
let { type, step, total_steps, current, total } = status.meta;
|
||||
|
||||
if (type === 'DocumentsAddition') {
|
||||
// If the total is null or undefined then the progress results is infinity.
|
||||
let progress = Math.round(current / total * 100);
|
||||
// We must divide the progress by the total number of indexing steps.
|
||||
progress = progress / total_steps;
|
||||
// And mark the previous steps as processed.
|
||||
progress = progress + (step * 100 / total_steps);
|
||||
// Generate the appropriate html bulma progress bar.
|
||||
html = `<progress class="progress" title="${progress}%" value="${progress}" max="100"></progress>`;
|
||||
} else {
|
||||
html = `<progress class="progress" max="100"></progress>`;
|
||||
}
|
||||
|
||||
content.html(html);
|
||||
}
|
||||
|
||||
if (status.type == "Processed") {
|
||||
const id = 'update-' + status.update_id;
|
||||
const content = $(`#${id} .updateStatus.content`);
|
||||
content.html('processed ' + JSON.stringify(status.meta));
|
||||
}
|
||||
|
||||
if (status.type == "Aborted") {
|
||||
const id = 'update-' + status.update_id;
|
||||
const content = $(`#${id} .updateStatus.content`);
|
||||
content.html('aborted ' + JSON.stringify(status.meta));
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
function prependChild(parent, newFirstChild) {
|
||||
parent.insertBefore(newFirstChild, parent.firstChild)
|
||||
}
|
||||
|
||||
// Make the number of document a little bit prettier
|
||||
$('#docs-count').text(function(index, text) {
|
||||
return parseInt(text).toLocaleString()
|
||||
});
|
||||
|
||||
// Make the database a little bit easier to read
|
||||
$('#db-size').text(function(index, text) {
|
||||
return filesize(parseInt(text))
|
||||
});
|
1176
http-ui/src/main.rs
1176
http-ui/src/main.rs
File diff suppressed because it is too large
Load Diff
@ -1,362 +0,0 @@
|
||||
#![allow(unused)]
|
||||
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use crossbeam_channel::Sender;
|
||||
use heed::types::{ByteSlice, DecodeIgnore, OwnedType, SerdeJson};
|
||||
use heed::{Database, Env, EnvOpenOptions};
|
||||
use milli::heed;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
pub type BEU64 = heed::zerocopy::U64<heed::byteorder::BE>;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct UpdateStore<M, N> {
|
||||
env: Env,
|
||||
pending_meta: Database<OwnedType<BEU64>, SerdeJson<M>>,
|
||||
pending: Database<OwnedType<BEU64>, ByteSlice>,
|
||||
processed_meta: Database<OwnedType<BEU64>, SerdeJson<N>>,
|
||||
aborted_meta: Database<OwnedType<BEU64>, SerdeJson<M>>,
|
||||
notification_sender: Sender<()>,
|
||||
}
|
||||
|
||||
pub trait UpdateHandler<M, N> {
|
||||
fn handle_update(&mut self, update_id: u64, meta: M, content: &[u8]) -> heed::Result<N>;
|
||||
}
|
||||
|
||||
impl<M, N, F> UpdateHandler<M, N> for F
|
||||
where
|
||||
F: FnMut(u64, M, &[u8]) -> heed::Result<N> + Send + 'static,
|
||||
{
|
||||
fn handle_update(&mut self, update_id: u64, meta: M, content: &[u8]) -> heed::Result<N> {
|
||||
self(update_id, meta, content)
|
||||
}
|
||||
}
|
||||
|
||||
impl<M: 'static, N: 'static> UpdateStore<M, N> {
|
||||
pub fn open<P, U>(
|
||||
mut options: EnvOpenOptions,
|
||||
path: P,
|
||||
mut update_handler: U,
|
||||
) -> heed::Result<Arc<UpdateStore<M, N>>>
|
||||
where
|
||||
P: AsRef<Path>,
|
||||
U: UpdateHandler<M, N> + Send + 'static,
|
||||
M: for<'a> Deserialize<'a>,
|
||||
N: Serialize,
|
||||
{
|
||||
options.max_dbs(4);
|
||||
let env = options.open(path)?;
|
||||
let pending_meta = env.create_database(Some("pending-meta"))?;
|
||||
let pending = env.create_database(Some("pending"))?;
|
||||
let processed_meta = env.create_database(Some("processed-meta"))?;
|
||||
let aborted_meta = env.create_database(Some("aborted-meta"))?;
|
||||
|
||||
let (notification_sender, notification_receiver) = crossbeam_channel::bounded(1);
|
||||
// Send a first notification to trigger the process.
|
||||
let _ = notification_sender.send(());
|
||||
|
||||
let update_store = Arc::new(UpdateStore {
|
||||
env,
|
||||
pending,
|
||||
pending_meta,
|
||||
processed_meta,
|
||||
aborted_meta,
|
||||
notification_sender,
|
||||
});
|
||||
|
||||
let update_store_cloned = update_store.clone();
|
||||
std::thread::spawn(move || {
|
||||
// Block and wait for something to process.
|
||||
for () in notification_receiver {
|
||||
loop {
|
||||
match update_store_cloned.process_pending_update(&mut update_handler) {
|
||||
Ok(Some(_)) => (),
|
||||
Ok(None) => break,
|
||||
Err(e) => eprintln!("error while processing update: {}", e),
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
Ok(update_store)
|
||||
}
|
||||
|
||||
/// Returns the new biggest id to use to store the new update.
|
||||
fn new_update_id(&self, txn: &heed::RoTxn) -> heed::Result<u64> {
|
||||
let last_pending =
|
||||
self.pending_meta.remap_data_type::<DecodeIgnore>().last(txn)?.map(|(k, _)| k.get());
|
||||
|
||||
let last_processed =
|
||||
self.processed_meta.remap_data_type::<DecodeIgnore>().last(txn)?.map(|(k, _)| k.get());
|
||||
|
||||
let last_aborted =
|
||||
self.aborted_meta.remap_data_type::<DecodeIgnore>().last(txn)?.map(|(k, _)| k.get());
|
||||
|
||||
let last_update_id =
|
||||
[last_pending, last_processed, last_aborted].iter().copied().flatten().max();
|
||||
|
||||
match last_update_id {
|
||||
Some(last_id) => Ok(last_id + 1),
|
||||
None => Ok(0),
|
||||
}
|
||||
}
|
||||
|
||||
/// Registers the update content in the pending store and the meta
|
||||
/// into the pending-meta store. Returns the new unique update id.
|
||||
pub fn register_update(&self, meta: &M, content: &[u8]) -> heed::Result<u64>
|
||||
where
|
||||
M: Serialize,
|
||||
{
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
|
||||
// We ask the update store to give us a new update id, this is safe,
|
||||
// no other update can have the same id because we use a write txn before
|
||||
// asking for the id and registering it so other update registering
|
||||
// will be forced to wait for a new write txn.
|
||||
let update_id = self.new_update_id(&wtxn)?;
|
||||
let update_key = BEU64::new(update_id);
|
||||
|
||||
self.pending_meta.put(&mut wtxn, &update_key, meta)?;
|
||||
self.pending.put(&mut wtxn, &update_key, content)?;
|
||||
|
||||
wtxn.commit()?;
|
||||
|
||||
if let Err(e) = self.notification_sender.try_send(()) {
|
||||
assert!(!e.is_disconnected(), "update notification channel is disconnected");
|
||||
}
|
||||
|
||||
Ok(update_id)
|
||||
}
|
||||
|
||||
/// Executes the user provided function on the next pending update (the one with the lowest id).
|
||||
/// This is asynchronous as it let the user process the update with a read-only txn and
|
||||
/// only writing the result meta to the processed-meta store *after* it has been processed.
|
||||
fn process_pending_update<U>(&self, handler: &mut U) -> heed::Result<Option<(u64, N)>>
|
||||
where
|
||||
U: UpdateHandler<M, N>,
|
||||
M: for<'a> Deserialize<'a>,
|
||||
N: Serialize,
|
||||
{
|
||||
// Create a read transaction to be able to retrieve the pending update in order.
|
||||
let rtxn = self.env.read_txn()?;
|
||||
let first_meta = self.pending_meta.first(&rtxn)?;
|
||||
|
||||
// If there is a pending update we process and only keep
|
||||
// a reader while processing it, not a writer.
|
||||
match first_meta {
|
||||
Some((first_id, first_meta)) => {
|
||||
let first_content =
|
||||
self.pending.get(&rtxn, &first_id)?.expect("associated update content");
|
||||
|
||||
// Process the pending update using the provided user function.
|
||||
let new_meta = handler.handle_update(first_id.get(), first_meta, first_content)?;
|
||||
drop(rtxn);
|
||||
|
||||
// Once the pending update have been successfully processed
|
||||
// we must remove the content from the pending stores and
|
||||
// write the *new* meta to the processed-meta store and commit.
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
self.pending_meta.delete(&mut wtxn, &first_id)?;
|
||||
self.pending.delete(&mut wtxn, &first_id)?;
|
||||
self.processed_meta.put(&mut wtxn, &first_id, &new_meta)?;
|
||||
wtxn.commit()?;
|
||||
|
||||
Ok(Some((first_id.get(), new_meta)))
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
/// The id and metadata of the update that is currently being processed,
|
||||
/// `None` if no update is being processed.
|
||||
pub fn processing_update(&self) -> heed::Result<Option<(u64, M)>>
|
||||
where
|
||||
M: for<'a> Deserialize<'a>,
|
||||
{
|
||||
let rtxn = self.env.read_txn()?;
|
||||
match self.pending_meta.first(&rtxn)? {
|
||||
Some((key, meta)) => Ok(Some((key.get(), meta))),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute the user defined function with the meta-store iterators, the first
|
||||
/// iterator is the *processed* meta one, the second the *aborted* meta one
|
||||
/// and, the last is the *pending* meta one.
|
||||
pub fn iter_metas<F, T>(&self, mut f: F) -> heed::Result<T>
|
||||
where
|
||||
M: for<'a> Deserialize<'a>,
|
||||
N: for<'a> Deserialize<'a>,
|
||||
F: for<'a> FnMut(
|
||||
heed::RoIter<'a, OwnedType<BEU64>, SerdeJson<N>>,
|
||||
heed::RoIter<'a, OwnedType<BEU64>, SerdeJson<M>>,
|
||||
heed::RoIter<'a, OwnedType<BEU64>, SerdeJson<M>>,
|
||||
) -> heed::Result<T>,
|
||||
{
|
||||
let rtxn = self.env.read_txn()?;
|
||||
|
||||
// We get the pending, processed and aborted meta iterators.
|
||||
let processed_iter = self.processed_meta.iter(&rtxn)?;
|
||||
let aborted_iter = self.aborted_meta.iter(&rtxn)?;
|
||||
let pending_iter = self.pending_meta.iter(&rtxn)?;
|
||||
|
||||
// We execute the user defined function with both iterators.
|
||||
(f)(processed_iter, aborted_iter, pending_iter)
|
||||
}
|
||||
|
||||
/// Returns the update associated meta or `None` if the update doesn't exist.
|
||||
pub fn meta(&self, update_id: u64) -> heed::Result<Option<UpdateStatusMeta<M, N>>>
|
||||
where
|
||||
M: for<'a> Deserialize<'a>,
|
||||
N: for<'a> Deserialize<'a>,
|
||||
{
|
||||
let rtxn = self.env.read_txn()?;
|
||||
let key = BEU64::new(update_id);
|
||||
|
||||
if let Some(meta) = self.pending_meta.get(&rtxn, &key)? {
|
||||
return Ok(Some(UpdateStatusMeta::Pending(meta)));
|
||||
}
|
||||
|
||||
if let Some(meta) = self.processed_meta.get(&rtxn, &key)? {
|
||||
return Ok(Some(UpdateStatusMeta::Processed(meta)));
|
||||
}
|
||||
|
||||
if let Some(meta) = self.aborted_meta.get(&rtxn, &key)? {
|
||||
return Ok(Some(UpdateStatusMeta::Aborted(meta)));
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
/// Aborts an update, an aborted update content is deleted and
|
||||
/// the meta of it is moved into the aborted updates database.
|
||||
///
|
||||
/// Trying to abort an update that is currently being processed, an update
|
||||
/// that as already been processed or which doesn't actually exist, will
|
||||
/// return `None`.
|
||||
pub fn abort_update(&self, update_id: u64) -> heed::Result<Option<M>>
|
||||
where
|
||||
M: Serialize + for<'a> Deserialize<'a>,
|
||||
{
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
let key = BEU64::new(update_id);
|
||||
|
||||
// We cannot abort an update that is currently being processed.
|
||||
if self.pending_meta.first(&wtxn)?.map(|(key, _)| key.get()) == Some(update_id) {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let meta = match self.pending_meta.get(&wtxn, &key)? {
|
||||
Some(meta) => meta,
|
||||
None => return Ok(None),
|
||||
};
|
||||
|
||||
self.aborted_meta.put(&mut wtxn, &key, &meta)?;
|
||||
self.pending_meta.delete(&mut wtxn, &key)?;
|
||||
self.pending.delete(&mut wtxn, &key)?;
|
||||
|
||||
wtxn.commit()?;
|
||||
|
||||
Ok(Some(meta))
|
||||
}
|
||||
|
||||
/// Aborts all the pending updates, and not the one being currently processed.
|
||||
/// Returns the update metas and ids that were successfully aborted.
|
||||
pub fn abort_pendings(&self) -> heed::Result<Vec<(u64, M)>>
|
||||
where
|
||||
M: Serialize + for<'a> Deserialize<'a>,
|
||||
{
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
let mut aborted_updates = Vec::new();
|
||||
|
||||
// We skip the first pending update as it is currently being processed.
|
||||
for result in self.pending_meta.iter(&wtxn)?.skip(1) {
|
||||
let (key, meta) = result?;
|
||||
let id = key.get();
|
||||
aborted_updates.push((id, meta));
|
||||
}
|
||||
|
||||
for (id, meta) in &aborted_updates {
|
||||
let key = BEU64::new(*id);
|
||||
self.aborted_meta.put(&mut wtxn, &key, &meta)?;
|
||||
self.pending_meta.delete(&mut wtxn, &key)?;
|
||||
self.pending.delete(&mut wtxn, &key)?;
|
||||
}
|
||||
|
||||
wtxn.commit()?;
|
||||
|
||||
Ok(aborted_updates)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Hash)]
|
||||
pub enum UpdateStatusMeta<M, N> {
|
||||
Pending(M),
|
||||
Processed(N),
|
||||
Aborted(M),
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::thread;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn simple() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let options = EnvOpenOptions::new();
|
||||
let update_store = UpdateStore::open(options, dir, |_id, meta: String, _content: &_| {
|
||||
Ok(meta + " processed")
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let meta = String::from("kiki");
|
||||
let update_id = update_store.register_update(&meta, &[]).unwrap();
|
||||
|
||||
thread::sleep(Duration::from_millis(100));
|
||||
|
||||
let meta = update_store.meta(update_id).unwrap().unwrap();
|
||||
assert_eq!(meta, UpdateStatusMeta::Processed(format!("kiki processed")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[ignore]
|
||||
fn long_running_update() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let options = EnvOpenOptions::new();
|
||||
let update_store = UpdateStore::open(options, dir, |_id, meta: String, _content: &_| {
|
||||
thread::sleep(Duration::from_millis(400));
|
||||
Ok(meta + " processed")
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let before_register = Instant::now();
|
||||
|
||||
let meta = String::from("kiki");
|
||||
let update_id_kiki = update_store.register_update(&meta, &[]).unwrap();
|
||||
assert!(before_register.elapsed() < Duration::from_millis(200));
|
||||
|
||||
let meta = String::from("coco");
|
||||
let update_id_coco = update_store.register_update(&meta, &[]).unwrap();
|
||||
assert!(before_register.elapsed() < Duration::from_millis(200));
|
||||
|
||||
let meta = String::from("cucu");
|
||||
let update_id_cucu = update_store.register_update(&meta, &[]).unwrap();
|
||||
assert!(before_register.elapsed() < Duration::from_millis(200));
|
||||
|
||||
thread::sleep(Duration::from_millis(400 * 3 + 100));
|
||||
|
||||
let meta = update_store.meta(update_id_kiki).unwrap().unwrap();
|
||||
assert_eq!(meta, UpdateStatusMeta::Processed(format!("kiki processed")));
|
||||
|
||||
let meta = update_store.meta(update_id_coco).unwrap().unwrap();
|
||||
assert_eq!(meta, UpdateStatusMeta::Processed(format!("coco processed")));
|
||||
|
||||
let meta = update_store.meta(update_id_cucu).unwrap().unwrap();
|
||||
assert_eq!(meta, UpdateStatusMeta::Processed(format!("cucu processed")));
|
||||
}
|
||||
}
|
@ -1,102 +0,0 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<link rel="stylesheet" href="/bulma.min.css">
|
||||
<link rel="stylesheet" href="/bulma-prefers-dark.min.css">
|
||||
<link rel="stylesheet" href="/style.css">
|
||||
<script type="text/javascript" src="/jquery-3.4.1.min.js"></script>
|
||||
<script type="text/javascript" src="/filesize.min.js"></script>
|
||||
<title>{{ db_name }} | The milli engine</title>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<section class="hero is-light">
|
||||
<div class="hero-body">
|
||||
<div class="container">
|
||||
|
||||
<div class="columns is-flex is-centered mb-6">
|
||||
<figure class="image is-128x128">
|
||||
<img id="logo-white" src="logo-white.svg" alt="milli logo in white">
|
||||
<img id="logo-black" src="logo-black.svg" alt="milli logo in black">
|
||||
</figure>
|
||||
</div>
|
||||
|
||||
<nav class="level">
|
||||
<div class="level-item has-text-centered">
|
||||
<div>
|
||||
<p class="heading">Database Name</p>
|
||||
<p class="title">{{ db_name }}</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="level-item has-text-centered">
|
||||
<div>
|
||||
<p class="heading">Database Size</p>
|
||||
<p class="title" id="db-size">{{ db_size }}</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="level-item has-text-centered">
|
||||
<div>
|
||||
<p class="heading">Number of Documents</p>
|
||||
<p class="title" id="docs-count">{{ docs_count }}</p>
|
||||
</div>
|
||||
<button id="display_facets" class="button is-success">display facets</button>
|
||||
</div>
|
||||
</nav>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section class="hero container">
|
||||
<div class="notification" style="border-radius: 0 0 4px 4px;">
|
||||
|
||||
<nav class="level">
|
||||
<!-- Left side -->
|
||||
<div class="level-left">
|
||||
<div class="level-item">
|
||||
<div class="field has-addons has-addons-right">
|
||||
<input id="query" class="input" type="text" autofocus placeholder="e.g. George Clooney">
|
||||
<input id="filters" class="input" type="text" placeholder="filters like released >= 1577836800">
|
||||
</div>
|
||||
</div>
|
||||
<div class="level-item"></div>
|
||||
</div>
|
||||
|
||||
<!-- Right side -->
|
||||
<nav class="level-right">
|
||||
<div class="level-item has-text-centered">
|
||||
<div>
|
||||
<p class="heading">Candidates</p>
|
||||
<p id="count" class="title">0</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="level-item has-text-centered">
|
||||
</div>
|
||||
<div class="level-item has-text-centered">
|
||||
<div>
|
||||
<p class="heading">Time Spent</p>
|
||||
<p id="time" class="title">0ms</p>
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
</nav>
|
||||
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section id="facets">
|
||||
<!-- facet values -->
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<ol id="results" class="content">
|
||||
<!-- documents matching requests -->
|
||||
</ol>
|
||||
</section>
|
||||
|
||||
</body>
|
||||
|
||||
<script type="text/javascript" src="/script.js"></script>
|
||||
|
||||
</html>
|
@ -1,95 +0,0 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<link rel="stylesheet" href="/bulma.min.css">
|
||||
<link rel="stylesheet" href="/bulma-prefers-dark.min.css">
|
||||
<link rel="stylesheet" href="/style.css">
|
||||
<script type="text/javascript" src="/jquery-3.4.1.min.js"></script>
|
||||
<script type="text/javascript" src="/filesize.min.js"></script>
|
||||
<title>{{ db_name }} | Updates</title>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<section class="hero is-light">
|
||||
<div class="hero-body">
|
||||
<div class="container">
|
||||
|
||||
<a href="/">
|
||||
<div class="columns is-flex is-centered mb-6">
|
||||
<figure class="image is-128x128">
|
||||
<img id="logo-white" src="logo-white.svg" alt="milli logo in white">
|
||||
<img id="logo-black" src="logo-black.svg" alt="milli logo in black">
|
||||
</figure>
|
||||
</div>
|
||||
</a>
|
||||
|
||||
<nav class="level">
|
||||
<div class="level-item has-text-centered">
|
||||
<div>
|
||||
<p class="heading">Database Name</p>
|
||||
<p class="title">{{ db_name }}</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="level-item has-text-centered">
|
||||
<div>
|
||||
<p class="heading">Database Size</p>
|
||||
<p class="title" id="db-size">{{ db_size }}</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="level-item has-text-centered">
|
||||
<div>
|
||||
<p class="heading">Number of Documents</p>
|
||||
<p class="title" id="docs-count">{{ docs_count }}</p>
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<ol id="results" class="content">
|
||||
|
||||
{% for update in updates %}
|
||||
{% match update %}
|
||||
{% when UpdateStatus::Pending with { update_id, meta } %}
|
||||
<li id="update-{{ update_id }}" class="document">
|
||||
<ol>
|
||||
<li class="field">
|
||||
<div class="attribute">update id</div><div class="updateId content">{{ update_id }}</div>
|
||||
<div class="attribute">update status</div><div class="updateStatus content">pending</div>
|
||||
</li>
|
||||
</ol>
|
||||
</li>
|
||||
{% when UpdateStatus::Processed with { update_id, meta } %}
|
||||
<li id="update-{{ update_id }}" class="document">
|
||||
<ol>
|
||||
<li class="field">
|
||||
<div class="attribute">update id</div><div class="updateId content">{{ update_id }}</div>
|
||||
<div class="attribute">update status</div><div class="updateStatus content">{{ meta }}</div>
|
||||
</li>
|
||||
</ol>
|
||||
</li>
|
||||
{% when UpdateStatus::Aborted with { update_id, meta } %}
|
||||
<li id="update-{{ update_id }}" class="document">
|
||||
<ol>
|
||||
<li class="field">
|
||||
<div class="attribute">update id</div><div class="updateId content">{{ update_id }}</div>
|
||||
<div class="attribute">update status</div><div class="updateStatus content">aborted</div>
|
||||
</li>
|
||||
</ol>
|
||||
</li>
|
||||
{% else %}
|
||||
{% endmatch %}
|
||||
{% endfor %}
|
||||
|
||||
</ol>
|
||||
</section>
|
||||
|
||||
</body>
|
||||
|
||||
<script type="text/javascript" src="/updates-script.js"></script>
|
||||
|
||||
</html>
|
@ -1,17 +0,0 @@
|
||||
[package]
|
||||
name = "infos"
|
||||
version = "0.33.4"
|
||||
authors = ["Clément Renault <clement@meilisearch.com>"]
|
||||
edition = "2018"
|
||||
publish = false
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.56"
|
||||
byte-unit = { version = "4.0.14", default-features = false, features = ["std"] }
|
||||
csv = "1.1.6"
|
||||
milli = { path = "../milli" }
|
||||
mimalloc = { version = "0.1.29", default-features = false }
|
||||
roaring = "0.9.0"
|
||||
serde_json = "1.0.79"
|
||||
stderrlog = "0.5.1"
|
||||
structopt = { version = "0.3.26", default-features = false }
|
1221
infos/src/main.rs
1221
infos/src/main.rs
File diff suppressed because it is too large
Load Diff
@ -1,26 +0,0 @@
|
||||
# Milli
|
||||
|
||||
## Fuzzing milli
|
||||
|
||||
Currently you can only fuzz the indexation.
|
||||
To execute the fuzzer run:
|
||||
```
|
||||
cargo +nightly fuzz run indexing
|
||||
```
|
||||
|
||||
To execute the fuzzer on multiple thread you can also run:
|
||||
```
|
||||
cargo +nightly fuzz run -j4 indexing
|
||||
```
|
||||
|
||||
Since the fuzzer is going to create a lot of temporary file to let milli index its documents
|
||||
I would also recommand to execute it on a ramdisk.
|
||||
Here is how to setup a ramdisk on linux:
|
||||
```
|
||||
sudo mount -t tmpfs none path/to/your/ramdisk
|
||||
```
|
||||
And then set the [TMPDIR](https://doc.rust-lang.org/std/env/fn.temp_dir.html) environment variable
|
||||
to make the fuzzer create its file in it:
|
||||
```
|
||||
export TMPDIR=path/to/your/ramdisk
|
||||
```
|
5
milli/fuzz/.gitignore
vendored
5
milli/fuzz/.gitignore
vendored
@ -1,5 +0,0 @@
|
||||
Cargo.lock
|
||||
target/
|
||||
|
||||
/corpus/
|
||||
/artifacts/
|
@ -1,34 +0,0 @@
|
||||
[package]
|
||||
name = "milli-fuzz"
|
||||
version = "0.0.0"
|
||||
authors = ["Automatically generated"]
|
||||
publish = false
|
||||
edition = "2018"
|
||||
|
||||
[package.metadata]
|
||||
cargo-fuzz = true
|
||||
|
||||
[dependencies]
|
||||
arbitrary = "1.0"
|
||||
libfuzzer-sys = "0.4"
|
||||
serde_json = { version = "1.0.62", features = ["preserve_order"] }
|
||||
anyhow = "1.0"
|
||||
tempfile = "3.3"
|
||||
arbitrary-json = "0.1.0"
|
||||
mimalloc = { version = "0.1.29", default-features = false }
|
||||
|
||||
[dependencies.milli]
|
||||
path = ".."
|
||||
|
||||
# Prevent this from interfering with workspaces
|
||||
[workspace]
|
||||
members = ["."]
|
||||
|
||||
[profile.release]
|
||||
debug = true
|
||||
|
||||
[[bin]]
|
||||
name = "indexing"
|
||||
path = "fuzz_targets/indexing.rs"
|
||||
test = false
|
||||
doc = false
|
@ -1,114 +0,0 @@
|
||||
#![no_main]
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::io::{BufWriter, Cursor, Read, Seek, Write};
|
||||
|
||||
use anyhow::{bail, Result};
|
||||
use arbitrary_json::ArbitraryValue;
|
||||
use libfuzzer_sys::fuzz_target;
|
||||
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||
use milli::heed::EnvOpenOptions;
|
||||
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
||||
use milli::{Index, Object};
|
||||
use serde_json::{Map, Value};
|
||||
|
||||
#[global_allocator]
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
/// reads json from input and write an obkv batch to writer.
|
||||
pub fn read_json(input: impl Read, writer: impl Write + Seek) -> Result<usize> {
|
||||
let writer = BufWriter::new(writer);
|
||||
let mut builder = DocumentsBatchBuilder::new(writer);
|
||||
|
||||
let values: Vec<Object> = serde_json::from_reader(input)?;
|
||||
if builder.documents_count() == 0 {
|
||||
bail!("Empty payload");
|
||||
}
|
||||
|
||||
for object in values {
|
||||
builder.append_json_object(&object)?;
|
||||
}
|
||||
|
||||
let count = builder.documents_count();
|
||||
let vector = builder.into_inner()?;
|
||||
|
||||
Ok(count as usize)
|
||||
}
|
||||
|
||||
fn index_documents(
|
||||
index: &mut milli::Index,
|
||||
documents: DocumentsBatchReader<Cursor<Vec<u8>>>,
|
||||
) -> Result<()> {
|
||||
let config = IndexerConfig::default();
|
||||
let mut wtxn = index.write_txn()?;
|
||||
|
||||
let indexing_config = IndexDocumentsConfig::default();
|
||||
let mut builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())?;
|
||||
builder.add_documents(documents)?;
|
||||
builder.execute().unwrap();
|
||||
|
||||
wtxn.commit()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn create_index() -> Result<milli::Index> {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(10 * 1024 * 1024 * 1024); // 10 GB
|
||||
options.max_readers(1);
|
||||
let index = Index::new(options, dir.path())?;
|
||||
|
||||
let config = IndexerConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
|
||||
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||
|
||||
let displayed_fields =
|
||||
["id", "title", "album", "artist", "genre", "country", "released", "duration"]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
builder.set_displayed_fields(displayed_fields);
|
||||
|
||||
let searchable_fields = ["title", "album", "artist"].iter().map(|s| s.to_string()).collect();
|
||||
builder.set_searchable_fields(searchable_fields);
|
||||
|
||||
let faceted_fields: HashSet<String> =
|
||||
["released-timestamp", "duration-float", "genre", "country", "artist"]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
builder.set_filterable_fields(faceted_fields.clone());
|
||||
builder.set_sortable_fields(faceted_fields);
|
||||
|
||||
builder.set_distinct_field("same".to_string());
|
||||
|
||||
builder.execute(|_| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
Ok(index)
|
||||
}
|
||||
|
||||
fuzz_target!(|batches: Vec<Vec<ArbitraryValue>>| {
|
||||
if let Ok(mut index) = create_index() {
|
||||
for batch in batches {
|
||||
let documents: Vec<Value> =
|
||||
batch.into_iter().map(|value| serde_json::Value::from(value)).collect();
|
||||
let json = Value::Array(documents);
|
||||
let json = serde_json::to_string(&json).unwrap();
|
||||
|
||||
let mut documents = Cursor::new(Vec::new());
|
||||
|
||||
// We ignore all malformed documents
|
||||
if let Ok(_) = read_json(json.as_bytes(), &mut documents) {
|
||||
documents.rewind().unwrap();
|
||||
let documents = DocumentsBatchReader::from_reader(documents).unwrap();
|
||||
// A lot of errors can come out of milli and we don't know which ones are normal or not
|
||||
// so we are only going to look for the unexpected panics.
|
||||
let _ = index_documents(&mut index, documents);
|
||||
}
|
||||
}
|
||||
|
||||
index.prepare_for_closing().wait();
|
||||
}
|
||||
});
|
Loading…
x
Reference in New Issue
Block a user