1use std::{
2 collections::HashMap,
3 env, fs,
4 path::{Path, PathBuf},
5 sync::LazyLock,
6};
7
8use log::{error, info};
9use rand::Rng;
10use regex::Regex;
11use tree_sitter::{Language, Parser};
12
13pub mod allocations;
14pub mod corpus_test;
15pub mod edits;
16pub mod random;
17pub mod scope_sequence;
18
19use crate::{
20 fuzz::{
21 corpus_test::{
22 check_changed_ranges, check_consistent_sizes, get_parser, set_included_ranges,
23 },
24 edits::{get_random_edit, invert_edit},
25 random::Rand,
26 },
27 parse::perform_edit,
28 test::{parse_tests, strip_sexp_fields, DiffKey, TestDiff, TestEntry},
29};
30
31pub static LOG_ENABLED: LazyLock<bool> = LazyLock::new(|| env::var("TREE_SITTER_LOG").is_ok());
32
33pub static LOG_GRAPH_ENABLED: LazyLock<bool> =
34 LazyLock::new(|| env::var("TREE_SITTER_LOG_GRAPHS").is_ok());
35
36pub static LANGUAGE_FILTER: LazyLock<Option<String>> =
37 LazyLock::new(|| env::var("TREE_SITTER_LANGUAGE").ok());
38
39pub static EXAMPLE_INCLUDE: LazyLock<Option<Regex>> =
40 LazyLock::new(|| regex_env_var("TREE_SITTER_EXAMPLE_INCLUDE"));
41
42pub static EXAMPLE_EXCLUDE: LazyLock<Option<Regex>> =
43 LazyLock::new(|| regex_env_var("TREE_SITTER_EXAMPLE_EXCLUDE"));
44
45pub static START_SEED: LazyLock<usize> = LazyLock::new(new_seed);
46
47pub const DEFAULT_EDIT_COUNT: usize = 3;
48pub static EDIT_COUNT: LazyLock<usize> =
49 LazyLock::new(|| int_env_var("TREE_SITTER_EDITS").unwrap_or(DEFAULT_EDIT_COUNT));
50
51pub const DEFAULT_ITERATION_COUNT: usize = 10;
52pub static ITERATION_COUNT: LazyLock<usize> =
53 LazyLock::new(|| int_env_var("TREE_SITTER_ITERATIONS").unwrap_or(DEFAULT_ITERATION_COUNT));
54
55fn int_env_var(name: &'static str) -> Option<usize> {
56 env::var(name).ok().and_then(|e| e.parse().ok())
57}
58
59fn regex_env_var(name: &'static str) -> Option<Regex> {
60 env::var(name).ok().and_then(|e| Regex::new(&e).ok())
61}
62
63#[must_use]
64pub fn new_seed() -> usize {
65 int_env_var("TREE_SITTER_SEED").unwrap_or_else(|| {
66 let mut rng = rand::thread_rng();
67 let seed = rng.gen::<usize>();
68 info!("Seed: {seed}");
69 seed
70 })
71}
72
73pub struct FuzzOptions {
74 pub skipped: Option<Vec<String>>,
75 pub subdir: Option<PathBuf>,
76 pub edits: usize,
77 pub iterations: usize,
78 pub include: Option<Regex>,
79 pub exclude: Option<Regex>,
80 pub log_graphs: bool,
81 pub log: bool,
82}
83
84pub fn fuzz_language_corpus(
85 language: &Language,
86 language_name: &str,
87 start_seed: usize,
88 grammar_dir: &Path,
89 options: &mut FuzzOptions,
90) {
91 fn retain(entry: &mut TestEntry, language_name: &str) -> bool {
92 match entry {
93 TestEntry::Example { attributes, .. } => {
94 attributes.languages[0].is_empty()
95 || attributes
96 .languages
97 .iter()
98 .any(|lang| lang.as_ref() == language_name)
99 }
100 TestEntry::Group {
101 ref mut children, ..
102 } => {
103 children.retain_mut(|child| retain(child, language_name));
104 !children.is_empty()
105 }
106 }
107 }
108
109 let subdir = options.subdir.take().unwrap_or_default();
110
111 let corpus_dir = grammar_dir.join(subdir).join("test").join("corpus");
112
113 if !corpus_dir.exists() || !corpus_dir.is_dir() {
114 error!("No corpus directory found, ensure that you have a `test/corpus` directory in your grammar directory with at least one test file.");
115 return;
116 }
117
118 if std::fs::read_dir(&corpus_dir).unwrap().count() == 0 {
119 error!("No corpus files found in `test/corpus`, ensure that you have at least one test file in your corpus directory.");
120 return;
121 }
122
123 let mut main_tests = parse_tests(&corpus_dir).unwrap();
124 match main_tests {
125 TestEntry::Group {
126 ref mut children, ..
127 } => {
128 children.retain_mut(|child| retain(child, language_name));
129 }
130 TestEntry::Example { .. } => unreachable!(),
131 }
132 let tests = flatten_tests(
133 main_tests,
134 options.include.as_ref(),
135 options.exclude.as_ref(),
136 );
137
138 let get_test_name = |test: &FlattenedTest| format!("{language_name} - {}", test.name);
139
140 let mut skipped = options
141 .skipped
142 .take()
143 .unwrap_or_default()
144 .into_iter()
145 .chain(tests.iter().filter(|x| x.skip).map(get_test_name))
146 .map(|x| (x, 0))
147 .collect::<HashMap<String, usize>>();
148
149 let mut failure_count = 0;
150
151 let log_seed = env::var("TREE_SITTER_LOG_SEED").is_ok();
152 let dump_edits = env::var("TREE_SITTER_DUMP_EDITS").is_ok();
153
154 if log_seed {
155 info!(" start seed: {start_seed}");
156 }
157
158 println!();
159 for (test_index, test) in tests.iter().enumerate() {
160 let test_name = get_test_name(test);
161 if let Some(counter) = skipped.get_mut(test_name.as_str()) {
162 println!(" {test_index}. {test_name} - SKIPPED");
163 *counter += 1;
164 continue;
165 }
166
167 println!(" {test_index}. {test_name}");
168
169 let passed = allocations::record_checked(|| {
170 let mut log_session = None;
171 let mut parser = get_parser(&mut log_session, "log.html");
172 parser.set_language(language).unwrap();
173 set_included_ranges(&mut parser, &test.input, test.template_delimiters);
174
175 let tree = parser.parse(&test.input, None).unwrap();
176
177 if test.error {
178 return true;
179 }
180
181 let mut actual_output = tree.root_node().to_sexp();
182 if !test.has_fields {
183 actual_output = strip_sexp_fields(&actual_output);
184 }
185
186 if actual_output != test.output {
187 println!("Incorrect initial parse for {test_name}");
188 DiffKey::print();
189 println!("{}", TestDiff::new(&actual_output, &test.output));
190 println!();
191 return false;
192 }
193
194 true
195 })
196 .unwrap_or_else(|e| {
197 error!("{e}");
198 false
199 });
200
201 if !passed {
202 failure_count += 1;
203 continue;
204 }
205
206 let mut parser = Parser::new();
207 parser.set_language(language).unwrap();
208 let tree = parser.parse(&test.input, None).unwrap();
209 drop(parser);
210
211 for trial in 0..options.iterations {
212 let seed = start_seed + trial;
213 let passed = allocations::record_checked(|| {
214 let mut rand = Rand::new(seed);
215 let mut log_session = None;
216 let mut parser = get_parser(&mut log_session, "log.html");
217 parser.set_language(language).unwrap();
218 let mut tree = tree.clone();
219 let mut input = test.input.clone();
220
221 if options.log_graphs {
222 info!("{}\n", String::from_utf8_lossy(&input));
223 }
224
225 let edit_count = rand.unsigned(options.edits);
227 let mut undo_stack = Vec::with_capacity(edit_count);
228 for _ in 0..=edit_count {
229 let edit = get_random_edit(&mut rand, &input);
230 undo_stack.push(invert_edit(&input, &edit));
231 perform_edit(&mut tree, &mut input, &edit).unwrap();
232 }
233
234 if log_seed {
235 info!(" {test_index}.{trial:<2} seed: {seed}");
236 }
237
238 if dump_edits {
239 fs::create_dir_all("fuzz").unwrap();
240 fs::write(
241 Path::new("fuzz")
242 .join(format!("edit.{seed}.{test_index}.{trial} {test_name}")),
243 &input,
244 )
245 .unwrap();
246 }
247
248 if options.log_graphs {
249 info!("{}\n", String::from_utf8_lossy(&input));
250 }
251
252 set_included_ranges(&mut parser, &input, test.template_delimiters);
253 let mut tree2 = parser.parse(&input, Some(&tree)).unwrap();
254
255 check_consistent_sizes(&tree2, &input);
257 if let Err(message) = check_changed_ranges(&tree, &tree2, &input) {
258 error!("\nUnexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n",);
259 return false;
260 }
261
262 while let Some(edit) = undo_stack.pop() {
264 perform_edit(&mut tree2, &mut input, &edit).unwrap();
265 }
266 if options.log_graphs {
267 info!("{}\n", String::from_utf8_lossy(&input));
268 }
269
270 set_included_ranges(&mut parser, &test.input, test.template_delimiters);
271 let tree3 = parser.parse(&input, Some(&tree2)).unwrap();
272
273 let mut actual_output = tree3.root_node().to_sexp();
275 if !test.has_fields {
276 actual_output = strip_sexp_fields(&actual_output);
277 }
278
279 if actual_output != test.output && !test.error {
280 println!("Incorrect parse for {test_name} - seed {seed}");
281 DiffKey::print();
282 println!("{}", TestDiff::new(&actual_output, &test.output));
283 println!();
284 return false;
285 }
286
287 check_consistent_sizes(&tree3, &input);
289 if let Err(message) = check_changed_ranges(&tree2, &tree3, &input) {
290 error!("Unexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n");
291 return false;
292 }
293
294 true
295 }).unwrap_or_else(|e| {
296 error!("{e}");
297 false
298 });
299
300 if !passed {
301 failure_count += 1;
302 break;
303 }
304 }
305 }
306
307 if failure_count != 0 {
308 info!("{failure_count} {language_name} corpus tests failed fuzzing");
309 }
310
311 skipped.retain(|_, v| *v == 0);
312
313 if !skipped.is_empty() {
314 info!("Non matchable skip definitions:");
315 for k in skipped.keys() {
316 info!(" {k}");
317 }
318 panic!("Non matchable skip definitions need to be removed");
319 }
320}
321
322pub struct FlattenedTest {
323 pub name: String,
324 pub input: Vec<u8>,
325 pub output: String,
326 pub languages: Vec<Box<str>>,
327 pub error: bool,
328 pub skip: bool,
329 pub has_fields: bool,
330 pub template_delimiters: Option<(&'static str, &'static str)>,
331}
332
333#[must_use]
334pub fn flatten_tests(
335 test: TestEntry,
336 include: Option<&Regex>,
337 exclude: Option<&Regex>,
338) -> Vec<FlattenedTest> {
339 fn helper(
340 test: TestEntry,
341 include: Option<&Regex>,
342 exclude: Option<&Regex>,
343 is_root: bool,
344 prefix: &str,
345 result: &mut Vec<FlattenedTest>,
346 ) {
347 match test {
348 TestEntry::Example {
349 mut name,
350 input,
351 output,
352 has_fields,
353 attributes,
354 ..
355 } => {
356 if !prefix.is_empty() {
357 name.insert_str(0, " - ");
358 name.insert_str(0, prefix);
359 }
360
361 if let Some(include) = include {
362 if !include.is_match(&name) {
363 return;
364 }
365 } else if let Some(exclude) = exclude {
366 if exclude.is_match(&name) {
367 return;
368 }
369 }
370
371 result.push(FlattenedTest {
372 name,
373 input,
374 output,
375 has_fields,
376 languages: attributes.languages,
377 error: attributes.error,
378 skip: attributes.skip,
379 template_delimiters: None,
380 });
381 }
382 TestEntry::Group {
383 mut name, children, ..
384 } => {
385 if !is_root && !prefix.is_empty() {
386 name.insert_str(0, " - ");
387 name.insert_str(0, prefix);
388 }
389 for child in children {
390 helper(child, include, exclude, false, &name, result);
391 }
392 }
393 }
394 }
395 let mut result = Vec::new();
396 helper(test, include, exclude, true, "", &mut result);
397 result
398}