2023-05-29 12:27:39 +02:00
use std::num::NonZeroUsize;
use std::path::PathBuf;
2023-05-29 13:44:32 +02:00
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
2023-05-29 12:27:39 +02:00
use std::time::Duration;
use arbitrary::{Arbitrary, Unstructured};
use clap::Parser;
use fuzzers::Operation;
use milli::heed::EnvOpenOptions;
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig};
use milli::Index;
use tempfile::TempDir;
#[derive(Debug, Arbitrary)]
struct Batch([Operation; 5]);
#[derive(Debug, Clone, Parser)]
struct Opt {
/// The number of fuzzer to run in parallel.
par: Option<NonZeroUsize>,
// We need to put a lot of newlines in the following documentation or else everything gets collapsed on one line
/// The path in which the databases will be created.
/// Using a ramdisk is recommended.
/// Linux:
/// sudo mount -t tmpfs -o size=2g tmpfs ramdisk # to create it
/// sudo umount ramdisk # to remove it
/// MacOS:
/// diskutil erasevolume HFS+ 'RAM Disk' `hdiutil attach -nobrowse -nomount ram://4194304 # create it
/// hdiutil detach /dev/:the_disk
path: Option<PathBuf>,
fn main() {
let opt = Opt::parse();
let progression: &'static AtomicUsize = Box::leak(Box::new(AtomicUsize::new(0)));
2023-05-29 13:44:32 +02:00
let stop: &'static AtomicBool = Box::leak(Box::new(AtomicBool::new(false)));
2023-05-29 12:27:39 +02:00
let par = opt.par.unwrap_or_else(|| std::thread::available_parallelism().unwrap()).get();
let mut handles = Vec::with_capacity(par);
for _ in 0..par {
let opt = opt.clone();
let handle = std::thread::spawn(move || {
let mut options = EnvOpenOptions::new();
options.map_size(1024 * 1024 * 1024 * 1024);
let tempdir = match opt.path {
Some(path) => TempDir::new_in(path).unwrap(),
None => TempDir::new().unwrap(),
let index = Index::new(options, tempdir.path()).unwrap();
let indexer_config = IndexerConfig::default();
let index_documents_config = IndexDocumentsConfig::default();
2023-05-29 13:39:26 +02:00
std::thread::scope(|s| {
loop {
2023-05-29 13:44:32 +02:00
if stop.load(Ordering::Relaxed) {
2023-05-29 13:39:26 +02:00
let v: Vec<u8> =
std::iter::repeat_with(|| fastrand::u8(..)).take(1000).collect();
2023-05-29 12:27:39 +02:00
2023-05-29 13:39:26 +02:00
let mut data = Unstructured::new(&v);
let batches = <[Batch; 5]>::arbitrary(&mut data).unwrap();
// will be used to display the error once a thread crashes
let dbg_input = format!("{:#?}", batches);
2023-05-29 12:27:39 +02:00
2023-05-29 13:39:26 +02:00
let handle = s.spawn(|| {
let mut wtxn = index.write_txn().unwrap();
2023-05-29 12:27:39 +02:00
2023-05-29 13:39:26 +02:00
for batch in batches {
let mut builder = IndexDocuments::new(
&mut wtxn,
|_| (),
|| false,
2023-05-29 12:27:39 +02:00
2023-05-29 13:39:26 +02:00
for op in batch.0 {
match op {
Operation::AddDoc(doc) => {
let documents =
let documents =
let (b, _added) = builder.add_documents(documents).unwrap();
builder = b;
Operation::DeleteDoc(id) => {
let (b, _removed) =
builder = b;
2023-05-29 12:27:39 +02:00
2023-05-29 13:39:26 +02:00
2023-05-29 12:27:39 +02:00
2023-05-29 13:39:26 +02:00
// after executing a batch we check if the database is corrupted
let res = index.search(&wtxn).execute().unwrap();
index.documents(&wtxn, res.documents_ids).unwrap();
progression.fetch_add(1, Ordering::Relaxed);
2023-11-22 18:21:19 +01:00
2023-05-29 13:39:26 +02:00
2023-05-29 13:44:32 +02:00
if let err @ Err(_) = handle.join() {
stop.store(true, Ordering::Relaxed);
2023-05-29 12:27:39 +02:00
2023-05-29 13:39:26 +02:00
2023-05-29 12:27:39 +02:00
std::thread::spawn(|| {
let mut last_value = 0;
let start = std::time::Instant::now();
loop {
let total = progression.load(Ordering::Relaxed);
2023-06-12 15:30:51 +02:00
let elapsed = start.elapsed().as_secs();
if elapsed > 3600 {
// after 1 hour, stop the fuzzer, success
2023-05-29 12:27:39 +02:00
2023-06-12 15:30:51 +02:00
"Has been running for {:?} seconds. Tested {} new values for a total of {}.",
2023-05-29 12:27:39 +02:00
total - last_value,
last_value = total;
for handle in handles {