move the fuzzer to its own crate

This commit is contained in:
Tamo 2023-05-29 12:27:39 +02:00
parent 002f42875f
commit 6c6387d05e
No known key found for this signature in database
GPG Key ID: 20CD8020AFA88D69
8 changed files with 531 additions and 396 deletions

708
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -13,7 +13,8 @@ members = [
"filter-parser",
"flatten-serde-json",
"json-depth-checker",
"benchmarks"
"benchmarks",
"fuzzers",
]
[workspace.package]

20
fuzzers/Cargo.toml Normal file
View File

@ -0,0 +1,20 @@
[package]
name = "fuzzers"
publish = false
version.workspace = true
authors.workspace = true
description.workspace = true
homepage.workspace = true
readme.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
arbitrary = { version = "1.3.0", features = ["derive"] }
clap = { version = "4.3.0", features = ["derive"] }
fastrand = "1.9.0"
milli = { path = "../milli" }
serde = { version = "1.0.160", features = ["derive"] }
serde_json = { version = "1.0.95", features = ["preserve_order"] }
tempfile = "3.5.0"

3
fuzzers/README.md Normal file
View File

@ -0,0 +1,3 @@
# Fuzzers
The purpose of this crate is to contains all the handmade "fuzzer" we may need.

136
fuzzers/src/bin/fuzz.rs Normal file
View File

@ -0,0 +1,136 @@
use std::num::NonZeroUsize;
use std::path::PathBuf;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::time::Duration;
use arbitrary::{Arbitrary, Unstructured};
use clap::Parser;
use fuzzers::Operation;
use milli::heed::EnvOpenOptions;
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig};
use milli::Index;
use tempfile::TempDir;
#[derive(Debug, Arbitrary)]
struct Batch([Operation; 5]);
#[derive(Debug, Clone, Parser)]
struct Opt {
/// The number of fuzzer to run in parallel.
#[clap(long)]
par: Option<NonZeroUsize>,
// We need to put a lot of newlines in the following documentation or else everything gets collapsed on one line
/// The path in which the databases will be created.
/// Using a ramdisk is recommended.
///
/// Linux:
///
/// sudo mount -t tmpfs -o size=2g tmpfs ramdisk # to create it
///
/// sudo umount ramdisk # to remove it
///
/// MacOS:
///
/// diskutil erasevolume HFS+ 'RAM Disk' `hdiutil attach -nobrowse -nomount ram://4194304 # create it
///
/// hdiutil detach /dev/:the_disk
///
#[clap(long)]
path: Option<PathBuf>,
}
fn main() {
let opt = Opt::parse();
let progression: &'static AtomicUsize = Box::leak(Box::new(AtomicUsize::new(0)));
let par = opt.par.unwrap_or_else(|| std::thread::available_parallelism().unwrap()).get();
let mut handles = Vec::with_capacity(par);
for _ in 0..par {
let opt = opt.clone();
let handle = std::thread::spawn(move || {
let mut options = EnvOpenOptions::new();
options.map_size(1024 * 1024 * 1024 * 1024);
let tempdir = match opt.path {
Some(path) => TempDir::new_in(path).unwrap(),
None => TempDir::new().unwrap(),
};
let index = Index::new(options, tempdir.path()).unwrap();
let indexer_config = IndexerConfig::default();
let index_documents_config = IndexDocumentsConfig::default();
loop {
let v: Vec<u8> = std::iter::repeat_with(|| fastrand::u8(..)).take(1000).collect();
let mut data = Unstructured::new(&v);
let batches = <[Batch; 5]>::arbitrary(&mut data).unwrap();
// will be used to display the error once a thread crashes
let dbg_input = format!("{:#?}", batches);
let mut wtxn = index.write_txn().unwrap();
for batch in batches {
let mut builder = IndexDocuments::new(
&mut wtxn,
&index,
&indexer_config,
index_documents_config.clone(),
|_| (),
|| false,
)
.unwrap();
for op in batch.0 {
match op {
Operation::AddDoc(doc) => {
let documents =
milli::documents::objects_from_json_value(doc.to_d());
let documents =
milli::documents::documents_batch_reader_from_objects(
documents,
);
let (b, _added) =
builder.add_documents(documents).expect(&dbg_input);
builder = b;
}
Operation::DeleteDoc(id) => {
let (b, _removed) =
builder.remove_documents(vec![id.to_s()]).unwrap();
builder = b;
}
}
}
builder.execute().expect(&dbg_input);
// after executing a batch we check if the database is corrupted
let res = index.search(&wtxn).execute().expect(&dbg_input);
index.documents(&wtxn, res.documents_ids).expect(&dbg_input);
progression.fetch_add(1, Ordering::Relaxed);
}
wtxn.abort().unwrap();
}
});
handles.push(handle);
}
std::thread::spawn(|| {
let mut last_value = 0;
let start = std::time::Instant::now();
loop {
let total = progression.load(Ordering::Relaxed);
println!(
"Has been running for {:?}. Tested {} new values for a total of {}.",
start.elapsed(),
total - last_value,
total
);
last_value = total;
std::thread::sleep(Duration::from_secs(1));
}
});
for handle in handles {
handle.join().unwrap();
}
}

46
fuzzers/src/lib.rs Normal file
View File

@ -0,0 +1,46 @@
use arbitrary::Arbitrary;
use serde_json::{json, Value};
#[derive(Debug, Arbitrary)]
pub enum Document {
One,
Two,
Three,
Four,
Five,
Six,
}
impl Document {
pub fn to_d(&self) -> Value {
match self {
Document::One => json!({ "id": 0, "doggo": "bernese" }),
Document::Two => json!({ "id": 0, "doggo": "golden" }),
Document::Three => json!({ "id": 0, "catto": "jorts" }),
Document::Four => json!({ "id": 1, "doggo": "bernese" }),
Document::Five => json!({ "id": 1, "doggo": "golden" }),
Document::Six => json!({ "id": 1, "catto": "jorts" }),
}
}
}
#[derive(Debug, Arbitrary)]
pub enum DocId {
Zero,
One,
}
impl DocId {
pub fn to_s(&self) -> String {
match self {
DocId::Zero => "0".to_string(),
DocId::One => "1".to_string(),
}
}
}
#[derive(Debug, Arbitrary)]
pub enum Operation {
AddDoc(Document),
DeleteDoc(DocId),
}

View File

@ -65,13 +65,6 @@ maplit = "1.0.2"
md5 = "0.7.0"
rand = {version = "0.8.5", features = ["small_rng"] }
# fuzzing
arbitrary = { version = "1.3.0", features = ["derive"] }
fastrand = "1.9.0"
[target.'cfg(fuzzing)'.dev-dependencies]
fuzzcheck = "0.12.1"
[features]
all-tokenizations = ["charabia/default"]

View File

@ -52,7 +52,9 @@ enum Operation {
#[derive(Debug, Arbitrary)]
struct Batch([Operation; 5]);
fn main() {
#[test]
#[ignore]
fn fuzz() {
let mut options = EnvOpenOptions::new();
options.map_size(1024 * 1024 * 1024 * 1024);
let _tempdir = TempDir::new().unwrap();