1use std::collections::HashMap;
2use std::fs;
3use std::os::unix::process::ExitStatusExt;
4use std::path::PathBuf;
5use std::time::Duration;
6use std::time::Instant;
7
8use anyhow::{Context, Result};
9use clap::Parser;
10use clap_verbosity_flag::{InfoLevel, Verbosity};
11use rand::Rng;
12use regex::Regex;
13use tree_sitter::Language;
14use tree_sitter::Tree;
15use tree_splicer::splice::{Config, Splicer};
16use treereduce::Check;
17use treereduce::CmdCheck;
18
19#[derive(Clone, Debug, clap::Parser)]
21#[command(author, version, about, long_about = None)]
22pub struct Args {
23 #[arg(help_heading = "Mutation options", short, long, default_value_t = 5)]
25 pub chaos: u8,
26
27 #[arg(help_heading = "Mutation options", long, default_value_t = 5)]
29 pub deletions: u8,
30
31 #[arg(help_heading = "Mutation options", long, default_value_t = 1048576)]
33 pub max_size: usize,
34
35 #[arg(help_heading = "Mutation options", short, long, default_value_t = 16)]
37 pub mutations: usize,
38
39 #[cfg(feature = "radamsa")]
41 #[arg(help_heading = "Mutation options", short, long)]
42 pub radamsa: bool,
43
44 #[arg(short, long)]
46 pub debug: bool,
47
48 #[arg(help_heading = "Interestingness check options",
50 long, default_values_t = Vec::<i32>::new(), value_name = "CODE")]
51 interesting_exit_code: Vec<i32>,
52
53 #[arg(
55 help_heading = "Interestingness check options",
56 long,
57 value_name = "REGEX"
58 )]
59 interesting_stdout: Option<String>,
60
61 #[arg(
63 help_heading = "Interestingness check options",
64 long,
65 value_name = "REGEX"
66 )]
67 interesting_stderr: Option<String>,
68
69 #[arg(
71 help_heading = "Interestingness check options",
72 long,
73 value_name = "REGEX",
74 requires = "interesting_stdout"
75 )]
76 uninteresting_stdout: Option<String>,
77
78 #[arg(
80 help_heading = "Interestingness check options",
81 long,
82 value_name = "REGEX",
83 requires = "interesting_stderr"
84 )]
85 uninteresting_stderr: Option<String>,
86
87 #[arg(short, long, default_value_t = num_cpus::get())]
89 pub jobs: usize,
90
91 #[arg(short, long, default_value_os = "tree-crasher.out")]
93 pub output: PathBuf,
94
95 #[arg(short, long, default_value_t = 0)]
97 pub seed: u64,
98
99 #[arg(long, default_value_t = 500)]
101 pub timeout: u64,
102
103 #[clap(flatten)]
104 verbose: Verbosity<InfoLevel>,
105
106 #[arg(value_name = "DIR", required = true)]
108 pub files: String,
109
110 #[arg(value_name = "CMD", required = true, num_args = 1..)]
112 pub check: Vec<String>,
113}
114
115fn read_file(file: &PathBuf) -> Result<String> {
116 fs::read_to_string(file).with_context(|| format!("Failed to read file {}", file.display()))
117}
118
119fn parse(language: Language, code: &str) -> Result<Tree> {
120 let mut parser = tree_sitter::Parser::new();
121 parser
122 .set_language(language)
123 .context("Failed to set tree-sitter parser language")?;
124 parser.parse(code, None).context("Failed to parse code")
125}
126
127#[allow(clippy::too_many_arguments)]
128fn make_check(
129 debug: bool,
130 timeout: Duration,
131 check: Vec<String>,
132 mut interesting_exit_codes: Vec<i32>,
133 interesting_stdout: Option<String>,
134 interesting_stderr: Option<String>,
135 uninteresting_stdout: Option<String>,
136 uninteresting_stderr: Option<String>,
137) -> Result<CmdCheck> {
138 if check.is_empty() {
139 eprintln!("Internal error: empty interestingness check!");
140 std::process::exit(1);
141 }
142 let mut argv: Vec<_> = check.iter().collect();
143 let cmd = argv[0];
144 argv.remove(0);
145 let stdout_regex = match &interesting_stdout {
146 Some(r) => Some(Regex::new(r).context("Invalid interesting stdout regex")?),
147 None => None,
148 };
149 let stderr_regex = match &interesting_stderr {
150 Some(r) => Some(Regex::new(r).context("Invalid interesting stderr regex")?),
151 None => None,
152 };
153 let un_stdout_regex = match &uninteresting_stdout {
154 Some(r) => Some(Regex::new(r).context("Invalid uninteresting stdout regex")?),
155 None => None,
156 };
157 let un_stderr_regex = match &uninteresting_stderr {
158 Some(r) => Some(Regex::new(r).context("Invalid uninteresting stderr regex")?),
159 None => None,
160 };
161 interesting_exit_codes.extend(128..256);
162 Ok(CmdCheck::new(
163 cmd.to_string(),
164 argv.iter().map(|s| s.to_string()).collect(),
165 interesting_exit_codes,
166 None,
167 stdout_regex,
168 stderr_regex,
169 un_stdout_regex,
170 un_stderr_regex,
171 debug,
172 debug,
173 Some(timeout),
174 ))
175}
176
177const BATCH: usize = 100_000; fn check(
180 language: Language,
181 node_types: &treereduce::NodeTypes,
182 chk: &CmdCheck,
183 inp: &[u8],
184) -> i32 {
185 let state = match chk.start(inp) {
186 Ok(s) => s,
187 Err(e) => {
188 eprintln!("Problem when running target: {e}");
189 return -1;
190 }
191 };
192 let (interesting, status, stdout, stderr) = chk.wait_with_output(state).unwrap();
193 let code = status.and_then(|s| s.code()).unwrap_or(-1);
194 let sig = status.and_then(|s| s.signal());
195 if interesting || sig.is_some() {
196 if let Some(s) = sig {
197 if s == 6 {
198 return code;
199 }
200 eprintln!("signal {s}!");
201 } else {
202 eprintln!("interesting!");
203 }
204 let mut rng = rand::thread_rng();
205 let i = rng.gen_range(0..10192);
206 std::fs::write(format!("tree-crasher-{i}.out"), inp).unwrap();
207 std::fs::write(format!("tree-crasher-{i}.stdout"), stdout).unwrap();
208 std::fs::write(format!("tree-crasher-{i}.stderr"), stderr).unwrap();
209 let tree = parse(language, &String::from_utf8_lossy(inp)).unwrap();
210 match treereduce::treereduce_multi_pass(
211 language,
212 node_types,
213 treereduce::Original::new(tree, inp.to_vec()),
214 &treereduce::Config {
215 check: chk.clone(),
216 delete_non_optional: true,
217 jobs: 1,
218 min_reduction: 2,
219 replacements: HashMap::new(),
220 },
221 Some(8),
222 ) {
223 Err(e) => eprintln!("Failed to reduce! {e}"),
224 Ok((reduced, _)) => {
225 std::fs::write(format!("tree-crasher-{i}.reduced.out"), reduced.text).unwrap();
226 }
227 }
228 }
229 code
230}
231
232fn job(
234 language: Language,
235 node_types1: &treereduce::NodeTypes,
237 node_types2: &tree_splicer::node_types::NodeTypes,
238 args: &Args,
239 files: &HashMap<String, (Vec<u8>, Tree)>,
240 chk: CmdCheck,
241) {
242 if files.is_empty() {
243 eprintln!("No files provided.");
244 return;
245 }
246 #[cfg(feature = "radamsa")]
247 if args.radamsa {
248 unsafe { radamsa_sys::radamsa_init() };
249 let mut rng = rand::thread_rng();
250 let file_bytes: Vec<_> = files.values().map(|(bytes, _tree)| bytes).collect();
251 loop {
252 const MAX_SIZE: usize = 4096;
253 let mut input: Vec<u8> = file_bytes
255 .get(rng.gen_range(0..files.len()))
256 .unwrap()
257 .to_vec();
258 let mut mutant = vec![0u8; MAX_SIZE];
259 let out_len = unsafe {
260 radamsa_sys::radamsa(
261 input.as_mut_ptr(),
262 input.len(),
263 mutant.as_mut_ptr(),
264 MAX_SIZE,
265 0,
266 )
267 };
268 assert!(out_len <= MAX_SIZE);
269 mutant.truncate(out_len);
270 check(language, node_types1, &chk, &mutant);
271 }
272 }
273 loop {
274 let config = Config {
275 chaos: args.chaos,
276 deletions: args.deletions,
277 language,
278 inter_splices: args.mutations,
280 node_types: node_types2.clone(),
281 max_size: args.max_size,
282 reparse: usize::MAX,
283 seed: args.seed,
284 };
285 let start = Instant::now();
286 let mut execs = 0;
287 for (i, out) in Splicer::new(config, files).enumerate() {
288 if i == BATCH {
289 break;
290 }
291 let _code = check(language, node_types1, &chk, &out);
292 execs += 1;
293 let secs = start.elapsed().as_secs();
294 if execs % 10_000 == 0 {
295 println!("execs/sec: {}", execs / secs);
296 }
297 }
298 }
299}
300
301pub fn main(language: Language, node_types_json_str: &'static str) -> Result<()> {
303 let args = Args::parse();
304 debug_assert!(args.interesting_stdout.is_some() || args.uninteresting_stdout.is_none());
305 debug_assert!(args.interesting_stderr.is_some() || args.uninteresting_stderr.is_none());
306
307 if args.debug {
308 eprintln!("Loading testcases...");
309 }
310 let mut files = HashMap::new();
311 for entry in fs::read_dir(&args.files)
313 .with_context(|| format!("When reading tests from {}", args.files))?
314 {
315 let entry = entry?;
316 let path = entry.path();
317 if let Ok(s) = read_file(&path) {
318 let tree = parse(language, &s)?;
319 files.insert(String::from(path.to_string_lossy()), (s.into_bytes(), tree));
320 }
321 }
322 let chk = make_check(
323 args.debug,
324 Duration::from_millis(args.timeout),
325 args.check.clone(),
326 args.interesting_exit_code.clone(),
327 args.interesting_stdout.clone(),
328 args.interesting_stderr.clone(),
329 args.uninteresting_stdout.clone(),
330 args.uninteresting_stderr.clone(),
331 )?;
332 let node_types1 = treereduce::NodeTypes::new(node_types_json_str).unwrap();
333 let node_types2 = tree_splicer::node_types::NodeTypes::new(node_types_json_str).unwrap();
334
335 if args.debug {
336 eprintln!("Spawning threads...");
337 }
338 #[cfg(not(feature = "radamsa"))]
339 let jobs = if args.debug { 1 } else { args.jobs };
340 #[cfg(feature = "radamsa")]
341 let jobs = if args.debug {
342 if args.jobs != 1 {
343 eprintln!("[WARN] Radamsa can only be used with one thread.");
344 }
345 1
346 } else {
347 args.jobs
348 };
349 std::thread::scope(|s| {
350 for _ in 0..jobs {
351 s.spawn(|| {
352 job(
353 language,
354 &node_types1,
355 &node_types2,
356 &args,
357 &files,
358 chk.clone(),
359 )
360 });
361 }
362 });
363
364 Ok(())
365}