1use std::{
2 collections::HashMap,
3 env, fs,
4 path::{Path, PathBuf},
5 sync::LazyLock,
6};
7
8use log::{error, info};
9use rand::Rng;
10use regex::Regex;
11use tree_sitter::{Language, Parser};
12
13pub mod allocations;
14pub mod corpus_test;
15pub mod edits;
16pub mod random;
17pub mod scope_sequence;
18
19use crate::{
20 fuzz::{
21 corpus_test::{
22 check_changed_ranges, check_consistent_sizes, get_parser, set_included_ranges,
23 },
24 edits::{get_random_edit, invert_edit},
25 random::Rand,
26 },
27 parse::perform_edit,
28 test::{parse_tests, strip_sexp_fields, DiffKey, TestDiff, TestEntry},
29};
30
31pub static LOG_ENABLED: LazyLock<bool> = LazyLock::new(|| env::var("TREE_SITTER_LOG").is_ok());
32
33pub static LOG_GRAPH_ENABLED: LazyLock<bool> =
34 LazyLock::new(|| env::var("TREE_SITTER_LOG_GRAPHS").is_ok());
35
36pub static LANGUAGE_FILTER: LazyLock<Option<String>> =
37 LazyLock::new(|| env::var("TREE_SITTER_LANGUAGE").ok());
38
39pub static EXAMPLE_INCLUDE: LazyLock<Option<Regex>> =
40 LazyLock::new(|| regex_env_var("TREE_SITTER_EXAMPLE_INCLUDE"));
41
42pub static EXAMPLE_EXCLUDE: LazyLock<Option<Regex>> =
43 LazyLock::new(|| regex_env_var("TREE_SITTER_EXAMPLE_EXCLUDE"));
44
45pub static START_SEED: LazyLock<usize> = LazyLock::new(new_seed);
46
47pub static EDIT_COUNT: LazyLock<usize> =
48 LazyLock::new(|| int_env_var("TREE_SITTER_EDITS").unwrap_or(3));
49
50pub static ITERATION_COUNT: LazyLock<usize> =
51 LazyLock::new(|| int_env_var("TREE_SITTER_ITERATIONS").unwrap_or(10));
52
53fn int_env_var(name: &'static str) -> Option<usize> {
54 env::var(name).ok().and_then(|e| e.parse().ok())
55}
56
57fn regex_env_var(name: &'static str) -> Option<Regex> {
58 env::var(name).ok().and_then(|e| Regex::new(&e).ok())
59}
60
61#[must_use]
62pub fn new_seed() -> usize {
63 int_env_var("TREE_SITTER_SEED").unwrap_or_else(|| {
64 let mut rng = rand::thread_rng();
65 let seed = rng.gen::<usize>();
66 info!("Seed: {seed}");
67 seed
68 })
69}
70
71pub struct FuzzOptions {
72 pub skipped: Option<Vec<String>>,
73 pub subdir: Option<PathBuf>,
74 pub edits: usize,
75 pub iterations: usize,
76 pub include: Option<Regex>,
77 pub exclude: Option<Regex>,
78 pub log_graphs: bool,
79 pub log: bool,
80}
81
82pub fn fuzz_language_corpus(
83 language: &Language,
84 language_name: &str,
85 start_seed: usize,
86 grammar_dir: &Path,
87 options: &mut FuzzOptions,
88) {
89 fn retain(entry: &mut TestEntry, language_name: &str) -> bool {
90 match entry {
91 TestEntry::Example { attributes, .. } => {
92 attributes.languages[0].is_empty()
93 || attributes
94 .languages
95 .iter()
96 .any(|lang| lang.as_ref() == language_name)
97 }
98 TestEntry::Group {
99 ref mut children, ..
100 } => {
101 children.retain_mut(|child| retain(child, language_name));
102 !children.is_empty()
103 }
104 }
105 }
106
107 let subdir = options.subdir.take().unwrap_or_default();
108
109 let corpus_dir = grammar_dir.join(subdir).join("test").join("corpus");
110
111 if !corpus_dir.exists() || !corpus_dir.is_dir() {
112 error!("No corpus directory found, ensure that you have a `test/corpus` directory in your grammar directory with at least one test file.");
113 return;
114 }
115
116 if std::fs::read_dir(&corpus_dir).unwrap().count() == 0 {
117 error!("No corpus files found in `test/corpus`, ensure that you have at least one test file in your corpus directory.");
118 return;
119 }
120
121 let mut main_tests = parse_tests(&corpus_dir).unwrap();
122 match main_tests {
123 TestEntry::Group {
124 ref mut children, ..
125 } => {
126 children.retain_mut(|child| retain(child, language_name));
127 }
128 TestEntry::Example { .. } => unreachable!(),
129 }
130 let tests = flatten_tests(
131 main_tests,
132 options.include.as_ref(),
133 options.exclude.as_ref(),
134 );
135
136 let get_test_name = |test: &FlattenedTest| format!("{language_name} - {}", test.name);
137
138 let mut skipped = options
139 .skipped
140 .take()
141 .unwrap_or_default()
142 .into_iter()
143 .chain(tests.iter().filter(|x| x.skip).map(get_test_name))
144 .map(|x| (x, 0))
145 .collect::<HashMap<String, usize>>();
146
147 let mut failure_count = 0;
148
149 let log_seed = env::var("TREE_SITTER_LOG_SEED").is_ok();
150 let dump_edits = env::var("TREE_SITTER_DUMP_EDITS").is_ok();
151
152 if log_seed {
153 info!(" start seed: {start_seed}");
154 }
155
156 println!();
157 for (test_index, test) in tests.iter().enumerate() {
158 let test_name = get_test_name(test);
159 if let Some(counter) = skipped.get_mut(test_name.as_str()) {
160 println!(" {test_index}. {test_name} - SKIPPED");
161 *counter += 1;
162 continue;
163 }
164
165 println!(" {test_index}. {test_name}");
166
167 let passed = allocations::record_checked(|| {
168 let mut log_session = None;
169 let mut parser = get_parser(&mut log_session, "log.html");
170 parser.set_language(language).unwrap();
171 set_included_ranges(&mut parser, &test.input, test.template_delimiters);
172
173 let tree = parser.parse(&test.input, None).unwrap();
174
175 if test.error {
176 return true;
177 }
178
179 let mut actual_output = tree.root_node().to_sexp();
180 if !test.has_fields {
181 actual_output = strip_sexp_fields(&actual_output);
182 }
183
184 if actual_output != test.output {
185 println!("Incorrect initial parse for {test_name}");
186 DiffKey::print();
187 println!("{}", TestDiff::new(&actual_output, &test.output));
188 println!();
189 return false;
190 }
191
192 true
193 })
194 .unwrap_or_else(|e| {
195 error!("{e}");
196 false
197 });
198
199 if !passed {
200 failure_count += 1;
201 continue;
202 }
203
204 let mut parser = Parser::new();
205 parser.set_language(language).unwrap();
206 let tree = parser.parse(&test.input, None).unwrap();
207 drop(parser);
208
209 for trial in 0..options.iterations {
210 let seed = start_seed + trial;
211 let passed = allocations::record_checked(|| {
212 let mut rand = Rand::new(seed);
213 let mut log_session = None;
214 let mut parser = get_parser(&mut log_session, "log.html");
215 parser.set_language(language).unwrap();
216 let mut tree = tree.clone();
217 let mut input = test.input.clone();
218
219 if options.log_graphs {
220 info!("{}\n", String::from_utf8_lossy(&input));
221 }
222
223 let edit_count = rand.unsigned(*EDIT_COUNT);
225 let mut undo_stack = Vec::with_capacity(edit_count);
226 for _ in 0..=edit_count {
227 let edit = get_random_edit(&mut rand, &input);
228 undo_stack.push(invert_edit(&input, &edit));
229 perform_edit(&mut tree, &mut input, &edit).unwrap();
230 }
231
232 if log_seed {
233 info!(" {test_index}.{trial:<2} seed: {seed}");
234 }
235
236 if dump_edits {
237 fs::create_dir_all("fuzz").unwrap();
238 fs::write(
239 Path::new("fuzz")
240 .join(format!("edit.{seed}.{test_index}.{trial} {test_name}")),
241 &input,
242 )
243 .unwrap();
244 }
245
246 if options.log_graphs {
247 info!("{}\n", String::from_utf8_lossy(&input));
248 }
249
250 set_included_ranges(&mut parser, &input, test.template_delimiters);
251 let mut tree2 = parser.parse(&input, Some(&tree)).unwrap();
252
253 check_consistent_sizes(&tree2, &input);
255 if let Err(message) = check_changed_ranges(&tree, &tree2, &input) {
256 error!("\nUnexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n",);
257 return false;
258 }
259
260 while let Some(edit) = undo_stack.pop() {
262 perform_edit(&mut tree2, &mut input, &edit).unwrap();
263 }
264 if options.log_graphs {
265 info!("{}\n", String::from_utf8_lossy(&input));
266 }
267
268 set_included_ranges(&mut parser, &test.input, test.template_delimiters);
269 let tree3 = parser.parse(&input, Some(&tree2)).unwrap();
270
271 let mut actual_output = tree3.root_node().to_sexp();
273 if !test.has_fields {
274 actual_output = strip_sexp_fields(&actual_output);
275 }
276
277 if actual_output != test.output && !test.error {
278 println!("Incorrect parse for {test_name} - seed {seed}");
279 DiffKey::print();
280 println!("{}", TestDiff::new(&actual_output, &test.output));
281 println!();
282 return false;
283 }
284
285 check_consistent_sizes(&tree3, &input);
287 if let Err(message) = check_changed_ranges(&tree2, &tree3, &input) {
288 error!("Unexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n");
289 return false;
290 }
291
292 true
293 }).unwrap_or_else(|e| {
294 error!("{e}");
295 false
296 });
297
298 if !passed {
299 failure_count += 1;
300 break;
301 }
302 }
303 }
304
305 if failure_count != 0 {
306 info!("{failure_count} {language_name} corpus tests failed fuzzing");
307 }
308
309 skipped.retain(|_, v| *v == 0);
310
311 if !skipped.is_empty() {
312 info!("Non matchable skip definitions:");
313 for k in skipped.keys() {
314 info!(" {k}");
315 }
316 panic!("Non matchable skip definitions need to be removed");
317 }
318}
319
320pub struct FlattenedTest {
321 pub name: String,
322 pub input: Vec<u8>,
323 pub output: String,
324 pub languages: Vec<Box<str>>,
325 pub error: bool,
326 pub skip: bool,
327 pub has_fields: bool,
328 pub template_delimiters: Option<(&'static str, &'static str)>,
329}
330
331#[must_use]
332pub fn flatten_tests(
333 test: TestEntry,
334 include: Option<&Regex>,
335 exclude: Option<&Regex>,
336) -> Vec<FlattenedTest> {
337 fn helper(
338 test: TestEntry,
339 include: Option<&Regex>,
340 exclude: Option<&Regex>,
341 is_root: bool,
342 prefix: &str,
343 result: &mut Vec<FlattenedTest>,
344 ) {
345 match test {
346 TestEntry::Example {
347 mut name,
348 input,
349 output,
350 has_fields,
351 attributes,
352 ..
353 } => {
354 if !prefix.is_empty() {
355 name.insert_str(0, " - ");
356 name.insert_str(0, prefix);
357 }
358
359 if let Some(include) = include {
360 if !include.is_match(&name) {
361 return;
362 }
363 } else if let Some(exclude) = exclude {
364 if exclude.is_match(&name) {
365 return;
366 }
367 }
368
369 result.push(FlattenedTest {
370 name,
371 input,
372 output,
373 has_fields,
374 languages: attributes.languages,
375 error: attributes.error,
376 skip: attributes.skip,
377 template_delimiters: None,
378 });
379 }
380 TestEntry::Group {
381 mut name, children, ..
382 } => {
383 if !is_root && !prefix.is_empty() {
384 name.insert_str(0, " - ");
385 name.insert_str(0, prefix);
386 }
387 for child in children {
388 helper(child, include, exclude, false, &name, result);
389 }
390 }
391 }
392 }
393 let mut result = Vec::new();
394 helper(test, include, exclude, true, "", &mut result);
395 result
396}