1use std::{collections::HashMap, env, fs, path::Path, sync::LazyLock};
2
3use rand::Rng;
4use regex::Regex;
5use tree_sitter::{Language, Parser};
6
7pub mod allocations;
8pub mod corpus_test;
9pub mod edits;
10pub mod random;
11pub mod scope_sequence;
12
13use crate::{
14 fuzz::{
15 corpus_test::{
16 check_changed_ranges, check_consistent_sizes, get_parser, set_included_ranges,
17 },
18 edits::{get_random_edit, invert_edit},
19 random::Rand,
20 },
21 parse::perform_edit,
22 test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields, TestEntry},
23};
24
25pub static LOG_ENABLED: LazyLock<bool> = LazyLock::new(|| env::var("TREE_SITTER_LOG").is_ok());
26
27pub static LOG_GRAPH_ENABLED: LazyLock<bool> =
28 LazyLock::new(|| env::var("TREE_SITTER_LOG_GRAPHS").is_ok());
29
30pub static LANGUAGE_FILTER: LazyLock<Option<String>> =
31 LazyLock::new(|| env::var("TREE_SITTER_LANGUAGE").ok());
32
33pub static EXAMPLE_INCLUDE: LazyLock<Option<Regex>> =
34 LazyLock::new(|| regex_env_var("TREE_SITTER_EXAMPLE_INCLUDE"));
35
36pub static EXAMPLE_EXCLUDE: LazyLock<Option<Regex>> =
37 LazyLock::new(|| regex_env_var("TREE_SITTER_EXAMPLE_EXCLUDE"));
38
39pub static START_SEED: LazyLock<usize> = LazyLock::new(new_seed);
40
41pub static EDIT_COUNT: LazyLock<usize> =
42 LazyLock::new(|| int_env_var("TREE_SITTER_EDITS").unwrap_or(3));
43
44pub static ITERATION_COUNT: LazyLock<usize> =
45 LazyLock::new(|| int_env_var("TREE_SITTER_ITERATIONS").unwrap_or(10));
46
47fn int_env_var(name: &'static str) -> Option<usize> {
48 env::var(name).ok().and_then(|e| e.parse().ok())
49}
50
51fn regex_env_var(name: &'static str) -> Option<Regex> {
52 env::var(name).ok().and_then(|e| Regex::new(&e).ok())
53}
54
55#[must_use]
56pub fn new_seed() -> usize {
57 int_env_var("TREE_SITTER_SEED").unwrap_or_else(|| {
58 let mut rng = rand::thread_rng();
59 let seed = rng.gen::<usize>();
60 eprintln!("Seed: {seed}");
61 seed
62 })
63}
64
65pub struct FuzzOptions {
66 pub skipped: Option<Vec<String>>,
67 pub subdir: Option<String>,
68 pub edits: usize,
69 pub iterations: usize,
70 pub include: Option<Regex>,
71 pub exclude: Option<Regex>,
72 pub log_graphs: bool,
73 pub log: bool,
74}
75
76pub fn fuzz_language_corpus(
77 language: &Language,
78 language_name: &str,
79 start_seed: usize,
80 grammar_dir: &Path,
81 options: &mut FuzzOptions,
82) {
83 fn retain(entry: &mut TestEntry, language_name: &str) -> bool {
84 match entry {
85 TestEntry::Example { attributes, .. } => {
86 attributes.languages[0].is_empty()
87 || attributes
88 .languages
89 .iter()
90 .any(|lang| lang.as_ref() == language_name)
91 }
92 TestEntry::Group {
93 ref mut children, ..
94 } => {
95 children.retain_mut(|child| retain(child, language_name));
96 !children.is_empty()
97 }
98 }
99 }
100
101 let subdir = options.subdir.take().unwrap_or_default();
102
103 let corpus_dir = grammar_dir.join(subdir).join("test").join("corpus");
104
105 if !corpus_dir.exists() || !corpus_dir.is_dir() {
106 eprintln!("No corpus directory found, ensure that you have a `test/corpus` directory in your grammar directory with at least one test file.");
107 return;
108 }
109
110 if std::fs::read_dir(&corpus_dir).unwrap().count() == 0 {
111 eprintln!("No corpus files found in `test/corpus`, ensure that you have at least one test file in your corpus directory.");
112 return;
113 }
114
115 let mut main_tests = parse_tests(&corpus_dir).unwrap();
116 match main_tests {
117 TestEntry::Group {
118 ref mut children, ..
119 } => {
120 children.retain_mut(|child| retain(child, language_name));
121 }
122 TestEntry::Example { .. } => unreachable!(),
123 }
124 let tests = flatten_tests(
125 main_tests,
126 options.include.as_ref(),
127 options.exclude.as_ref(),
128 );
129
130 let get_test_name = |test: &FlattenedTest| format!("{language_name} - {}", test.name);
131
132 let mut skipped = options
133 .skipped
134 .take()
135 .unwrap_or_default()
136 .into_iter()
137 .chain(tests.iter().filter(|x| x.skip).map(get_test_name))
138 .map(|x| (x, 0))
139 .collect::<HashMap<String, usize>>();
140
141 let mut failure_count = 0;
142
143 let log_seed = env::var("TREE_SITTER_LOG_SEED").is_ok();
144 let dump_edits = env::var("TREE_SITTER_DUMP_EDITS").is_ok();
145
146 if log_seed {
147 println!(" start seed: {start_seed}");
148 }
149
150 println!();
151 for (test_index, test) in tests.iter().enumerate() {
152 let test_name = get_test_name(test);
153 if let Some(counter) = skipped.get_mut(test_name.as_str()) {
154 println!(" {test_index}. {test_name} - SKIPPED");
155 *counter += 1;
156 continue;
157 }
158
159 println!(" {test_index}. {test_name}");
160
161 let passed = allocations::record(|| {
162 let mut log_session = None;
163 let mut parser = get_parser(&mut log_session, "log.html");
164 parser.set_language(language).unwrap();
165 set_included_ranges(&mut parser, &test.input, test.template_delimiters);
166
167 let tree = parser.parse(&test.input, None).unwrap();
168
169 if test.error {
170 return true;
171 }
172
173 let mut actual_output = tree.root_node().to_sexp();
174 if !test.has_fields {
175 actual_output = strip_sexp_fields(&actual_output);
176 }
177
178 if actual_output != test.output {
179 println!("Incorrect initial parse for {test_name}");
180 print_diff_key();
181 print_diff(&actual_output, &test.output, true);
182 println!();
183 return false;
184 }
185
186 true
187 })
188 .unwrap_or_else(|e| {
189 eprintln!("Error: {e}");
190 false
191 });
192
193 if !passed {
194 failure_count += 1;
195 continue;
196 }
197
198 let mut parser = Parser::new();
199 parser.set_language(language).unwrap();
200 let tree = parser.parse(&test.input, None).unwrap();
201 drop(parser);
202
203 for trial in 0..options.iterations {
204 let seed = start_seed + trial;
205 let passed = allocations::record(|| {
206 let mut rand = Rand::new(seed);
207 let mut log_session = None;
208 let mut parser = get_parser(&mut log_session, "log.html");
209 parser.set_language(language).unwrap();
210 let mut tree = tree.clone();
211 let mut input = test.input.clone();
212
213 if options.log_graphs {
214 eprintln!("{}\n", String::from_utf8_lossy(&input));
215 }
216
217 let edit_count = rand.unsigned(*EDIT_COUNT);
219 let mut undo_stack = Vec::with_capacity(edit_count);
220 for _ in 0..=edit_count {
221 let edit = get_random_edit(&mut rand, &input);
222 undo_stack.push(invert_edit(&input, &edit));
223 perform_edit(&mut tree, &mut input, &edit).unwrap();
224 }
225
226 if log_seed {
227 println!(" {test_index}.{trial:<2} seed: {seed}");
228 }
229
230 if dump_edits {
231 fs::create_dir_all("fuzz").unwrap();
232 fs::write(
233 Path::new("fuzz")
234 .join(format!("edit.{seed}.{test_index}.{trial} {test_name}")),
235 &input,
236 )
237 .unwrap();
238 }
239
240 if options.log_graphs {
241 eprintln!("{}\n", String::from_utf8_lossy(&input));
242 }
243
244 set_included_ranges(&mut parser, &input, test.template_delimiters);
245 let mut tree2 = parser.parse(&input, Some(&tree)).unwrap();
246
247 check_consistent_sizes(&tree2, &input);
249 if let Err(message) = check_changed_ranges(&tree, &tree2, &input) {
250 println!("\nUnexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n",);
251 return false;
252 }
253
254 while let Some(edit) = undo_stack.pop() {
256 perform_edit(&mut tree2, &mut input, &edit).unwrap();
257 }
258 if options.log_graphs {
259 eprintln!("{}\n", String::from_utf8_lossy(&input));
260 }
261
262 set_included_ranges(&mut parser, &test.input, test.template_delimiters);
263 let tree3 = parser.parse(&input, Some(&tree2)).unwrap();
264
265 let mut actual_output = tree3.root_node().to_sexp();
267 if !test.has_fields {
268 actual_output = strip_sexp_fields(&actual_output);
269 }
270
271 if actual_output != test.output && !test.error {
272 println!("Incorrect parse for {test_name} - seed {seed}");
273 print_diff_key();
274 print_diff(&actual_output, &test.output, true);
275 println!();
276 return false;
277 }
278
279 check_consistent_sizes(&tree3, &input);
281 if let Err(message) = check_changed_ranges(&tree2, &tree3, &input) {
282 println!("Unexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n");
283 return false;
284 }
285
286 true
287 }).unwrap_or_else(|e| {
288 eprintln!("Error: {e}");
289 false
290 });
291
292 if !passed {
293 failure_count += 1;
294 break;
295 }
296 }
297 }
298
299 if failure_count != 0 {
300 eprintln!("{failure_count} {language_name} corpus tests failed fuzzing");
301 }
302
303 skipped.retain(|_, v| *v == 0);
304
305 if !skipped.is_empty() {
306 println!("Non matchable skip definitions:");
307 for k in skipped.keys() {
308 println!(" {k}");
309 }
310 panic!("Non matchable skip definitions needs to be removed");
311 }
312}
313
314pub struct FlattenedTest {
315 pub name: String,
316 pub input: Vec<u8>,
317 pub output: String,
318 pub languages: Vec<Box<str>>,
319 pub error: bool,
320 pub skip: bool,
321 pub has_fields: bool,
322 pub template_delimiters: Option<(&'static str, &'static str)>,
323}
324
325#[must_use]
326pub fn flatten_tests(
327 test: TestEntry,
328 include: Option<&Regex>,
329 exclude: Option<&Regex>,
330) -> Vec<FlattenedTest> {
331 fn helper(
332 test: TestEntry,
333 include: Option<&Regex>,
334 exclude: Option<&Regex>,
335 is_root: bool,
336 prefix: &str,
337 result: &mut Vec<FlattenedTest>,
338 ) {
339 match test {
340 TestEntry::Example {
341 mut name,
342 input,
343 output,
344 has_fields,
345 attributes,
346 ..
347 } => {
348 if !prefix.is_empty() {
349 name.insert_str(0, " - ");
350 name.insert_str(0, prefix);
351 }
352
353 if let Some(include) = include {
354 if !include.is_match(&name) {
355 return;
356 }
357 } else if let Some(exclude) = exclude {
358 if exclude.is_match(&name) {
359 return;
360 }
361 }
362
363 result.push(FlattenedTest {
364 name,
365 input,
366 output,
367 has_fields,
368 languages: attributes.languages,
369 error: attributes.error,
370 skip: attributes.skip,
371 template_delimiters: None,
372 });
373 }
374 TestEntry::Group {
375 mut name, children, ..
376 } => {
377 if !is_root && !prefix.is_empty() {
378 name.insert_str(0, " - ");
379 name.insert_str(0, prefix);
380 }
381 for child in children {
382 helper(child, include, exclude, false, &name, result);
383 }
384 }
385 }
386 }
387 let mut result = Vec::new();
388 helper(test, include, exclude, true, "", &mut result);
389 result
390}