1use std::{collections::HashMap, env, fs, path::Path, sync::LazyLock};
2
3use rand::Rng;
4use regex::Regex;
5use tree_sitter::{Language, Parser};
6
7pub mod allocations;
8pub mod corpus_test;
9pub mod edits;
10pub mod random;
11pub mod scope_sequence;
12
13use crate::{
14 fuzz::{
15 corpus_test::{
16 check_changed_ranges, check_consistent_sizes, get_parser, set_included_ranges,
17 },
18 edits::{get_random_edit, invert_edit},
19 random::Rand,
20 },
21 parse::perform_edit,
22 test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields, TestEntry},
23};
24
25pub static LOG_ENABLED: LazyLock<bool> = LazyLock::new(|| env::var("TREE_SITTER_LOG").is_ok());
26
27pub static LOG_GRAPH_ENABLED: LazyLock<bool> =
28 LazyLock::new(|| env::var("TREE_SITTER_LOG_GRAPHS").is_ok());
29
30pub static LANGUAGE_FILTER: LazyLock<Option<String>> =
31 LazyLock::new(|| env::var("TREE_SITTER_LANGUAGE").ok());
32
33pub static EXAMPLE_INCLUDE: LazyLock<Option<Regex>> =
34 LazyLock::new(|| regex_env_var("TREE_SITTER_EXAMPLE_INCLUDE"));
35
36pub static EXAMPLE_EXCLUDE: LazyLock<Option<Regex>> =
37 LazyLock::new(|| regex_env_var("TREE_SITTER_EXAMPLE_EXCLUDE"));
38
39pub static START_SEED: LazyLock<usize> = LazyLock::new(new_seed);
40
41pub static EDIT_COUNT: LazyLock<usize> =
42 LazyLock::new(|| int_env_var("TREE_SITTER_EDITS").unwrap_or(3));
43
44pub static ITERATION_COUNT: LazyLock<usize> =
45 LazyLock::new(|| int_env_var("TREE_SITTER_ITERATIONS").unwrap_or(10));
46
47fn int_env_var(name: &'static str) -> Option<usize> {
48 env::var(name).ok().and_then(|e| e.parse().ok())
49}
50
51fn regex_env_var(name: &'static str) -> Option<Regex> {
52 env::var(name).ok().and_then(|e| Regex::new(&e).ok())
53}
54
55#[must_use]
56pub fn new_seed() -> usize {
57 int_env_var("TREE_SITTER_SEED").unwrap_or_else(|| {
58 let mut rng = rand::thread_rng();
59 rng.gen::<usize>()
60 })
61}
62
63pub struct FuzzOptions {
64 pub skipped: Option<Vec<String>>,
65 pub subdir: Option<String>,
66 pub edits: usize,
67 pub iterations: usize,
68 pub include: Option<Regex>,
69 pub exclude: Option<Regex>,
70 pub log_graphs: bool,
71 pub log: bool,
72}
73
74pub fn fuzz_language_corpus(
75 language: &Language,
76 language_name: &str,
77 start_seed: usize,
78 grammar_dir: &Path,
79 options: &mut FuzzOptions,
80) {
81 fn retain(entry: &mut TestEntry, language_name: &str) -> bool {
82 match entry {
83 TestEntry::Example { attributes, .. } => {
84 attributes.languages[0].is_empty()
85 || attributes
86 .languages
87 .iter()
88 .any(|lang| lang.as_ref() == language_name)
89 }
90 TestEntry::Group {
91 ref mut children, ..
92 } => {
93 children.retain_mut(|child| retain(child, language_name));
94 !children.is_empty()
95 }
96 }
97 }
98
99 let subdir = options.subdir.take().unwrap_or_default();
100
101 let corpus_dir = grammar_dir.join(subdir).join("test").join("corpus");
102
103 if !corpus_dir.exists() || !corpus_dir.is_dir() {
104 eprintln!("No corpus directory found, ensure that you have a `test/corpus` directory in your grammar directory with at least one test file.");
105 return;
106 }
107
108 if std::fs::read_dir(&corpus_dir).unwrap().count() == 0 {
109 eprintln!("No corpus files found in `test/corpus`, ensure that you have at least one test file in your corpus directory.");
110 return;
111 }
112
113 let mut main_tests = parse_tests(&corpus_dir).unwrap();
114 match main_tests {
115 TestEntry::Group {
116 ref mut children, ..
117 } => {
118 children.retain_mut(|child| retain(child, language_name));
119 }
120 TestEntry::Example { .. } => unreachable!(),
121 }
122 let tests = flatten_tests(
123 main_tests,
124 options.include.as_ref(),
125 options.exclude.as_ref(),
126 );
127
128 let get_test_name = |test: &FlattenedTest| format!("{language_name} - {}", test.name);
129
130 let mut skipped = options
131 .skipped
132 .take()
133 .unwrap_or_default()
134 .into_iter()
135 .chain(tests.iter().filter(|x| x.skip).map(get_test_name))
136 .map(|x| (x, 0))
137 .collect::<HashMap<String, usize>>();
138
139 let mut failure_count = 0;
140
141 let log_seed = env::var("TREE_SITTER_LOG_SEED").is_ok();
142 let dump_edits = env::var("TREE_SITTER_DUMP_EDITS").is_ok();
143
144 if log_seed {
145 println!(" start seed: {start_seed}");
146 }
147
148 println!();
149 for (test_index, test) in tests.iter().enumerate() {
150 let test_name = get_test_name(test);
151 if let Some(counter) = skipped.get_mut(test_name.as_str()) {
152 println!(" {test_index}. {test_name} - SKIPPED");
153 *counter += 1;
154 continue;
155 }
156
157 println!(" {test_index}. {test_name}");
158
159 let passed = allocations::record(|| {
160 let mut log_session = None;
161 let mut parser = get_parser(&mut log_session, "log.html");
162 parser.set_language(language).unwrap();
163 set_included_ranges(&mut parser, &test.input, test.template_delimiters);
164
165 let tree = parser.parse(&test.input, None).unwrap();
166
167 if test.error {
168 return true;
169 }
170
171 let mut actual_output = tree.root_node().to_sexp();
172 if !test.has_fields {
173 actual_output = strip_sexp_fields(&actual_output);
174 }
175
176 if actual_output != test.output {
177 println!("Incorrect initial parse for {test_name}");
178 print_diff_key();
179 print_diff(&actual_output, &test.output, true);
180 println!();
181 return false;
182 }
183
184 true
185 })
186 .unwrap_or_else(|e| {
187 eprintln!("Error: {e}");
188 false
189 });
190
191 if !passed {
192 failure_count += 1;
193 continue;
194 }
195
196 let mut parser = Parser::new();
197 parser.set_language(language).unwrap();
198 let tree = parser.parse(&test.input, None).unwrap();
199 drop(parser);
200
201 for trial in 0..options.iterations {
202 let seed = start_seed + trial;
203 let passed = allocations::record(|| {
204 let mut rand = Rand::new(seed);
205 let mut log_session = None;
206 let mut parser = get_parser(&mut log_session, "log.html");
207 parser.set_language(language).unwrap();
208 let mut tree = tree.clone();
209 let mut input = test.input.clone();
210
211 if options.log_graphs {
212 eprintln!("{}\n", String::from_utf8_lossy(&input));
213 }
214
215 let mut undo_stack = Vec::new();
217 for _ in 0..=rand.unsigned(*EDIT_COUNT) {
218 let edit = get_random_edit(&mut rand, &input);
219 undo_stack.push(invert_edit(&input, &edit));
220 perform_edit(&mut tree, &mut input, &edit).unwrap();
221 }
222
223 if log_seed {
224 println!(" {test_index}.{trial:<2} seed: {seed}");
225 }
226
227 if dump_edits {
228 fs::create_dir_all("fuzz").unwrap();
229 fs::write(
230 Path::new("fuzz")
231 .join(format!("edit.{seed}.{test_index}.{trial} {test_name}")),
232 &input,
233 )
234 .unwrap();
235 }
236
237 if options.log_graphs {
238 eprintln!("{}\n", String::from_utf8_lossy(&input));
239 }
240
241 set_included_ranges(&mut parser, &input, test.template_delimiters);
242 let mut tree2 = parser.parse(&input, Some(&tree)).unwrap();
243
244 check_consistent_sizes(&tree2, &input);
246 if let Err(message) = check_changed_ranges(&tree, &tree2, &input) {
247 println!("\nUnexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n",);
248 return false;
249 }
250
251 while let Some(edit) = undo_stack.pop() {
253 perform_edit(&mut tree2, &mut input, &edit).unwrap();
254 }
255 if options.log_graphs {
256 eprintln!("{}\n", String::from_utf8_lossy(&input));
257 }
258
259 set_included_ranges(&mut parser, &test.input, test.template_delimiters);
260 let tree3 = parser.parse(&input, Some(&tree2)).unwrap();
261
262 let mut actual_output = tree3.root_node().to_sexp();
264 if !test.has_fields {
265 actual_output = strip_sexp_fields(&actual_output);
266 }
267
268 if actual_output != test.output && !test.error {
269 println!("Incorrect parse for {test_name} - seed {seed}");
270 print_diff_key();
271 print_diff(&actual_output, &test.output, true);
272 println!();
273 return false;
274 }
275
276 check_consistent_sizes(&tree3, &input);
278 if let Err(message) = check_changed_ranges(&tree2, &tree3, &input) {
279 println!("Unexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n");
280 return false;
281 }
282
283 true
284 }).unwrap_or_else(|e| {
285 eprintln!("Error: {e}");
286 false
287 });
288
289 if !passed {
290 failure_count += 1;
291 break;
292 }
293 }
294 }
295
296 if failure_count != 0 {
297 eprintln!("{failure_count} {language_name} corpus tests failed fuzzing");
298 }
299
300 skipped.retain(|_, v| *v == 0);
301
302 if !skipped.is_empty() {
303 println!("Non matchable skip definitions:");
304 for k in skipped.keys() {
305 println!(" {k}");
306 }
307 panic!("Non matchable skip definitions needs to be removed");
308 }
309}
310
311pub struct FlattenedTest {
312 pub name: String,
313 pub input: Vec<u8>,
314 pub output: String,
315 pub languages: Vec<Box<str>>,
316 pub error: bool,
317 pub skip: bool,
318 pub has_fields: bool,
319 pub template_delimiters: Option<(&'static str, &'static str)>,
320}
321
322#[must_use]
323pub fn flatten_tests(
324 test: TestEntry,
325 include: Option<&Regex>,
326 exclude: Option<&Regex>,
327) -> Vec<FlattenedTest> {
328 fn helper(
329 test: TestEntry,
330 include: Option<&Regex>,
331 exclude: Option<&Regex>,
332 is_root: bool,
333 prefix: &str,
334 result: &mut Vec<FlattenedTest>,
335 ) {
336 match test {
337 TestEntry::Example {
338 mut name,
339 input,
340 output,
341 has_fields,
342 attributes,
343 ..
344 } => {
345 if !prefix.is_empty() {
346 name.insert_str(0, " - ");
347 name.insert_str(0, prefix);
348 }
349
350 if let Some(include) = include {
351 if !include.is_match(&name) {
352 return;
353 }
354 } else if let Some(exclude) = exclude {
355 if exclude.is_match(&name) {
356 return;
357 }
358 }
359
360 result.push(FlattenedTest {
361 name,
362 input,
363 output,
364 has_fields,
365 languages: attributes.languages,
366 error: attributes.error,
367 skip: attributes.skip,
368 template_delimiters: None,
369 });
370 }
371 TestEntry::Group {
372 mut name, children, ..
373 } => {
374 if !is_root && !prefix.is_empty() {
375 name.insert_str(0, " - ");
376 name.insert_str(0, prefix);
377 }
378 for child in children {
379 helper(child, include, exclude, false, &name, result);
380 }
381 }
382 }
383 }
384 let mut result = Vec::new();
385 helper(test, include, exclude, true, "", &mut result);
386 result
387}