tree_crasher/
lib.rs

1use std::collections::HashMap;
2use std::fs;
3use std::os::unix::process::ExitStatusExt;
4use std::path::PathBuf;
5use std::time::Duration;
6use std::time::Instant;
7
8use anyhow::{Context, Result};
9use clap::Parser;
10use clap_verbosity_flag::{InfoLevel, Verbosity};
11use rand::Rng;
12use regex::Regex;
13use tree_sitter::Language;
14use tree_sitter::Tree;
15use tree_splicer::splice::{Config, Splicer};
16use treereduce::Check;
17use treereduce::CmdCheck;
18
19/// An easy-to-use grammar-based black-box fuzzer
20#[derive(Clone, Debug, clap::Parser)]
21#[command(author, version, about, long_about = None)]
22pub struct Args {
23    /// Percent of "chaotic" mutations - may introduce syntax errors
24    #[arg(help_heading = "Mutation options", short, long, default_value_t = 5)]
25    pub chaos: u8,
26
27    /// Percent of deletion mutations - the rest are splices
28    #[arg(help_heading = "Mutation options", long, default_value_t = 5)]
29    pub deletions: u8,
30
31    /// Approximate maximum file size to produce (bytes); default = 1MiB
32    #[arg(help_heading = "Mutation options", long, default_value_t = 1048576)]
33    pub max_size: usize,
34
35    /// Number of mutations per test
36    #[arg(help_heading = "Mutation options", short, long, default_value_t = 16)]
37    pub mutations: usize,
38
39    /// Use Radamsa for mutations; ignore all other mutation options
40    #[cfg(feature = "radamsa")]
41    #[arg(help_heading = "Mutation options", short, long)]
42    pub radamsa: bool,
43
44    /// Run a single thread and show stdout, stderr of target
45    #[arg(short, long)]
46    pub debug: bool,
47
48    /// Exit code to consider interesting
49    #[arg(help_heading = "Interestingness check options",
50          long, default_values_t = Vec::<i32>::new(), value_name = "CODE")]
51    interesting_exit_code: Vec<i32>,
52
53    /// Regex to match interesting stdout
54    #[arg(
55        help_heading = "Interestingness check options",
56        long,
57        value_name = "REGEX"
58    )]
59    interesting_stdout: Option<String>,
60
61    /// Regex to match interesting stderr
62    #[arg(
63        help_heading = "Interestingness check options",
64        long,
65        value_name = "REGEX"
66    )]
67    interesting_stderr: Option<String>,
68
69    /// Regex to match *uninteresting* stdout, overrides interesting regex
70    #[arg(
71        help_heading = "Interestingness check options",
72        long,
73        value_name = "REGEX",
74        requires = "interesting_stdout"
75    )]
76    uninteresting_stdout: Option<String>,
77
78    /// Regex to match *uninteresting* stderr, overrides interesting regex
79    #[arg(
80        help_heading = "Interestingness check options",
81        long,
82        value_name = "REGEX",
83        requires = "interesting_stderr"
84    )]
85    uninteresting_stderr: Option<String>,
86
87    /// Number of threads
88    #[arg(short, long, default_value_t = num_cpus::get())]
89    pub jobs: usize,
90
91    /// Directory to output to
92    #[arg(short, long, default_value_os = "tree-crasher.out")]
93    pub output: PathBuf,
94
95    /// Seed
96    #[arg(short, long, default_value_t = 0)]
97    pub seed: u64,
98
99    /// Timeout (ms)
100    #[arg(long, default_value_t = 500)]
101    pub timeout: u64,
102
103    #[clap(flatten)]
104    verbose: Verbosity<InfoLevel>,
105
106    /// Input files
107    #[arg(value_name = "DIR", required = true)]
108    pub files: String,
109
110    /// Interestingness check; fed test case on stdin or via '@@' file
111    #[arg(value_name = "CMD", required = true, num_args = 1..)]
112    pub check: Vec<String>,
113}
114
115fn read_file(file: &PathBuf) -> Result<String> {
116    fs::read_to_string(file).with_context(|| format!("Failed to read file {}", file.display()))
117}
118
119fn parse(language: Language, code: &str) -> Result<Tree> {
120    let mut parser = tree_sitter::Parser::new();
121    parser
122        .set_language(language)
123        .context("Failed to set tree-sitter parser language")?;
124    parser.parse(code, None).context("Failed to parse code")
125}
126
127#[allow(clippy::too_many_arguments)]
128fn make_check(
129    debug: bool,
130    timeout: Duration,
131    check: Vec<String>,
132    mut interesting_exit_codes: Vec<i32>,
133    interesting_stdout: Option<String>,
134    interesting_stderr: Option<String>,
135    uninteresting_stdout: Option<String>,
136    uninteresting_stderr: Option<String>,
137) -> Result<CmdCheck> {
138    if check.is_empty() {
139        eprintln!("Internal error: empty interestingness check!");
140        std::process::exit(1);
141    }
142    let mut argv: Vec<_> = check.iter().collect();
143    let cmd = argv[0];
144    argv.remove(0);
145    let stdout_regex = match &interesting_stdout {
146        Some(r) => Some(Regex::new(r).context("Invalid interesting stdout regex")?),
147        None => None,
148    };
149    let stderr_regex = match &interesting_stderr {
150        Some(r) => Some(Regex::new(r).context("Invalid interesting stderr regex")?),
151        None => None,
152    };
153    let un_stdout_regex = match &uninteresting_stdout {
154        Some(r) => Some(Regex::new(r).context("Invalid uninteresting stdout regex")?),
155        None => None,
156    };
157    let un_stderr_regex = match &uninteresting_stderr {
158        Some(r) => Some(Regex::new(r).context("Invalid uninteresting stderr regex")?),
159        None => None,
160    };
161    interesting_exit_codes.extend(128..256);
162    Ok(CmdCheck::new(
163        cmd.to_string(),
164        argv.iter().map(|s| s.to_string()).collect(),
165        interesting_exit_codes,
166        None,
167        stdout_regex,
168        stderr_regex,
169        un_stdout_regex,
170        un_stderr_regex,
171        debug,
172        debug,
173        Some(timeout),
174    ))
175}
176
177const BATCH: usize = 100_000; // not all materialized at once
178
179fn check(
180    language: Language,
181    node_types: &treereduce::NodeTypes,
182    chk: &CmdCheck,
183    inp: &[u8],
184) -> i32 {
185    let state = match chk.start(inp) {
186        Ok(s) => s,
187        Err(e) => {
188            eprintln!("Problem when running target: {e}");
189            return -1;
190        }
191    };
192    let (interesting, status, stdout, stderr) = chk.wait_with_output(state).unwrap();
193    let code = status.and_then(|s| s.code()).unwrap_or(-1);
194    let sig = status.and_then(|s| s.signal());
195    if interesting || sig.is_some() {
196        if let Some(s) = sig {
197            if s == 6 {
198                return code;
199            }
200            eprintln!("signal {s}!");
201        } else {
202            eprintln!("interesting!");
203        }
204        let mut rng = rand::thread_rng();
205        let i = rng.gen_range(0..10192);
206        std::fs::write(format!("tree-crasher-{i}.out"), inp).unwrap();
207        std::fs::write(format!("tree-crasher-{i}.stdout"), stdout).unwrap();
208        std::fs::write(format!("tree-crasher-{i}.stderr"), stderr).unwrap();
209        let tree = parse(language, &String::from_utf8_lossy(inp)).unwrap();
210        match treereduce::treereduce_multi_pass(
211            language,
212            node_types,
213            treereduce::Original::new(tree, inp.to_vec()),
214            &treereduce::Config {
215                check: chk.clone(),
216                delete_non_optional: true,
217                jobs: 1,
218                min_reduction: 2,
219                replacements: HashMap::new(),
220            },
221            Some(8),
222        ) {
223            Err(e) => eprintln!("Failed to reduce! {e}"),
224            Ok((reduced, _)) => {
225                std::fs::write(format!("tree-crasher-{i}.reduced.out"), reduced.text).unwrap();
226            }
227        }
228    }
229    code
230}
231
232// TODO: print executions/sec
233fn job(
234    language: Language,
235    // HACK: there should be another crate that deals with this...
236    node_types1: &treereduce::NodeTypes,
237    node_types2: &tree_splicer::node_types::NodeTypes,
238    args: &Args,
239    files: &HashMap<String, (Vec<u8>, Tree)>,
240    chk: CmdCheck,
241) {
242    if files.is_empty() {
243        eprintln!("No files provided.");
244        return;
245    }
246    #[cfg(feature = "radamsa")]
247    if args.radamsa {
248        unsafe { radamsa_sys::radamsa_init() };
249        let mut rng = rand::thread_rng();
250        let file_bytes: Vec<_> = files.values().map(|(bytes, _tree)| bytes).collect();
251        loop {
252            const MAX_SIZE: usize = 4096;
253            // TODO: Mutate in-place
254            let mut input: Vec<u8> = file_bytes
255                .get(rng.gen_range(0..files.len()))
256                .unwrap()
257                .to_vec();
258            let mut mutant = vec![0u8; MAX_SIZE];
259            let out_len = unsafe {
260                radamsa_sys::radamsa(
261                    input.as_mut_ptr(),
262                    input.len(),
263                    mutant.as_mut_ptr(),
264                    MAX_SIZE,
265                    0,
266                )
267            };
268            assert!(out_len <= MAX_SIZE);
269            mutant.truncate(out_len);
270            check(language, node_types1, &chk, &mutant);
271        }
272    }
273    loop {
274        let config = Config {
275            chaos: args.chaos,
276            deletions: args.deletions,
277            language,
278            // intra_splices: 10,
279            inter_splices: args.mutations,
280            node_types: node_types2.clone(),
281            max_size: args.max_size,
282            reparse: usize::MAX,
283            seed: args.seed,
284        };
285        let start = Instant::now();
286        let mut execs = 0;
287        for (i, out) in Splicer::new(config, files).enumerate() {
288            if i == BATCH {
289                break;
290            }
291            let _code = check(language, node_types1, &chk, &out);
292            execs += 1;
293            let secs = start.elapsed().as_secs();
294            if execs % 10_000 == 0 {
295                println!("execs/sec: {}", execs / secs);
296            }
297        }
298    }
299}
300
301// TODO: graceful exit
302pub fn main(language: Language, node_types_json_str: &'static str) -> Result<()> {
303    let args = Args::parse();
304    debug_assert!(args.interesting_stdout.is_some() || args.uninteresting_stdout.is_none());
305    debug_assert!(args.interesting_stderr.is_some() || args.uninteresting_stderr.is_none());
306
307    if args.debug {
308        eprintln!("Loading testcases...");
309    }
310    let mut files = HashMap::new();
311    // TODO error messages
312    for entry in fs::read_dir(&args.files)
313        .with_context(|| format!("When reading tests from {}", args.files))?
314    {
315        let entry = entry?;
316        let path = entry.path();
317        if let Ok(s) = read_file(&path) {
318            let tree = parse(language, &s)?;
319            files.insert(String::from(path.to_string_lossy()), (s.into_bytes(), tree));
320        }
321    }
322    let chk = make_check(
323        args.debug,
324        Duration::from_millis(args.timeout),
325        args.check.clone(),
326        args.interesting_exit_code.clone(),
327        args.interesting_stdout.clone(),
328        args.interesting_stderr.clone(),
329        args.uninteresting_stdout.clone(),
330        args.uninteresting_stderr.clone(),
331    )?;
332    let node_types1 = treereduce::NodeTypes::new(node_types_json_str).unwrap();
333    let node_types2 = tree_splicer::node_types::NodeTypes::new(node_types_json_str).unwrap();
334
335    if args.debug {
336        eprintln!("Spawning threads...");
337    }
338    #[cfg(not(feature = "radamsa"))]
339    let jobs = if args.debug { 1 } else { args.jobs };
340    #[cfg(feature = "radamsa")]
341    let jobs = if args.debug {
342        if args.jobs != 1 {
343            eprintln!("[WARN] Radamsa can only be used with one thread.");
344        }
345        1
346    } else {
347        args.jobs
348    };
349    std::thread::scope(|s| {
350        for _ in 0..jobs {
351            s.spawn(|| {
352                job(
353                    language,
354                    &node_types1,
355                    &node_types2,
356                    &args,
357                    &files,
358                    chk.clone(),
359                )
360            });
361        }
362    });
363
364    Ok(())
365}