tree_sitter_cli/
fuzz.rs

1use std::{
2    collections::HashMap,
3    env, fs,
4    path::{Path, PathBuf},
5    sync::LazyLock,
6};
7
8use log::{error, info};
9use rand::Rng;
10use regex::Regex;
11use tree_sitter::{Language, Parser};
12
13pub mod allocations;
14pub mod corpus_test;
15pub mod edits;
16pub mod random;
17pub mod scope_sequence;
18
19use crate::{
20    fuzz::{
21        corpus_test::{
22            check_changed_ranges, check_consistent_sizes, get_parser, set_included_ranges,
23        },
24        edits::{get_random_edit, invert_edit},
25        random::Rand,
26    },
27    parse::perform_edit,
28    test::{parse_tests, strip_sexp_fields, DiffKey, TestDiff, TestEntry},
29};
30
31pub static LOG_ENABLED: LazyLock<bool> = LazyLock::new(|| env::var("TREE_SITTER_LOG").is_ok());
32
33pub static LOG_GRAPH_ENABLED: LazyLock<bool> =
34    LazyLock::new(|| env::var("TREE_SITTER_LOG_GRAPHS").is_ok());
35
36pub static LANGUAGE_FILTER: LazyLock<Option<String>> =
37    LazyLock::new(|| env::var("TREE_SITTER_LANGUAGE").ok());
38
39pub static EXAMPLE_INCLUDE: LazyLock<Option<Regex>> =
40    LazyLock::new(|| regex_env_var("TREE_SITTER_EXAMPLE_INCLUDE"));
41
42pub static EXAMPLE_EXCLUDE: LazyLock<Option<Regex>> =
43    LazyLock::new(|| regex_env_var("TREE_SITTER_EXAMPLE_EXCLUDE"));
44
45pub static START_SEED: LazyLock<usize> = LazyLock::new(new_seed);
46
47pub static EDIT_COUNT: LazyLock<usize> =
48    LazyLock::new(|| int_env_var("TREE_SITTER_EDITS").unwrap_or(3));
49
50pub static ITERATION_COUNT: LazyLock<usize> =
51    LazyLock::new(|| int_env_var("TREE_SITTER_ITERATIONS").unwrap_or(10));
52
53fn int_env_var(name: &'static str) -> Option<usize> {
54    env::var(name).ok().and_then(|e| e.parse().ok())
55}
56
57fn regex_env_var(name: &'static str) -> Option<Regex> {
58    env::var(name).ok().and_then(|e| Regex::new(&e).ok())
59}
60
61#[must_use]
62pub fn new_seed() -> usize {
63    int_env_var("TREE_SITTER_SEED").unwrap_or_else(|| {
64        let mut rng = rand::thread_rng();
65        let seed = rng.gen::<usize>();
66        info!("Seed: {seed}");
67        seed
68    })
69}
70
71pub struct FuzzOptions {
72    pub skipped: Option<Vec<String>>,
73    pub subdir: Option<PathBuf>,
74    pub edits: usize,
75    pub iterations: usize,
76    pub include: Option<Regex>,
77    pub exclude: Option<Regex>,
78    pub log_graphs: bool,
79    pub log: bool,
80}
81
82pub fn fuzz_language_corpus(
83    language: &Language,
84    language_name: &str,
85    start_seed: usize,
86    grammar_dir: &Path,
87    options: &mut FuzzOptions,
88) {
89    fn retain(entry: &mut TestEntry, language_name: &str) -> bool {
90        match entry {
91            TestEntry::Example { attributes, .. } => {
92                attributes.languages[0].is_empty()
93                    || attributes
94                        .languages
95                        .iter()
96                        .any(|lang| lang.as_ref() == language_name)
97            }
98            TestEntry::Group {
99                ref mut children, ..
100            } => {
101                children.retain_mut(|child| retain(child, language_name));
102                !children.is_empty()
103            }
104        }
105    }
106
107    let subdir = options.subdir.take().unwrap_or_default();
108
109    let corpus_dir = grammar_dir.join(subdir).join("test").join("corpus");
110
111    if !corpus_dir.exists() || !corpus_dir.is_dir() {
112        error!("No corpus directory found, ensure that you have a `test/corpus` directory in your grammar directory with at least one test file.");
113        return;
114    }
115
116    if std::fs::read_dir(&corpus_dir).unwrap().count() == 0 {
117        error!("No corpus files found in `test/corpus`, ensure that you have at least one test file in your corpus directory.");
118        return;
119    }
120
121    let mut main_tests = parse_tests(&corpus_dir).unwrap();
122    match main_tests {
123        TestEntry::Group {
124            ref mut children, ..
125        } => {
126            children.retain_mut(|child| retain(child, language_name));
127        }
128        TestEntry::Example { .. } => unreachable!(),
129    }
130    let tests = flatten_tests(
131        main_tests,
132        options.include.as_ref(),
133        options.exclude.as_ref(),
134    );
135
136    let get_test_name = |test: &FlattenedTest| format!("{language_name} - {}", test.name);
137
138    let mut skipped = options
139        .skipped
140        .take()
141        .unwrap_or_default()
142        .into_iter()
143        .chain(tests.iter().filter(|x| x.skip).map(get_test_name))
144        .map(|x| (x, 0))
145        .collect::<HashMap<String, usize>>();
146
147    let mut failure_count = 0;
148
149    let log_seed = env::var("TREE_SITTER_LOG_SEED").is_ok();
150    let dump_edits = env::var("TREE_SITTER_DUMP_EDITS").is_ok();
151
152    if log_seed {
153        info!("  start seed: {start_seed}");
154    }
155
156    println!();
157    for (test_index, test) in tests.iter().enumerate() {
158        let test_name = get_test_name(test);
159        if let Some(counter) = skipped.get_mut(test_name.as_str()) {
160            println!("  {test_index}. {test_name} - SKIPPED");
161            *counter += 1;
162            continue;
163        }
164
165        println!("  {test_index}. {test_name}");
166
167        let passed = allocations::record_checked(|| {
168            let mut log_session = None;
169            let mut parser = get_parser(&mut log_session, "log.html");
170            parser.set_language(language).unwrap();
171            set_included_ranges(&mut parser, &test.input, test.template_delimiters);
172
173            let tree = parser.parse(&test.input, None).unwrap();
174
175            if test.error {
176                return true;
177            }
178
179            let mut actual_output = tree.root_node().to_sexp();
180            if !test.has_fields {
181                actual_output = strip_sexp_fields(&actual_output);
182            }
183
184            if actual_output != test.output {
185                println!("Incorrect initial parse for {test_name}");
186                DiffKey::print();
187                println!("{}", TestDiff::new(&actual_output, &test.output));
188                println!();
189                return false;
190            }
191
192            true
193        })
194        .unwrap_or_else(|e| {
195            error!("{e}");
196            false
197        });
198
199        if !passed {
200            failure_count += 1;
201            continue;
202        }
203
204        let mut parser = Parser::new();
205        parser.set_language(language).unwrap();
206        let tree = parser.parse(&test.input, None).unwrap();
207        drop(parser);
208
209        for trial in 0..options.iterations {
210            let seed = start_seed + trial;
211            let passed = allocations::record_checked(|| {
212                let mut rand = Rand::new(seed);
213                let mut log_session = None;
214                let mut parser = get_parser(&mut log_session, "log.html");
215                parser.set_language(language).unwrap();
216                let mut tree = tree.clone();
217                let mut input = test.input.clone();
218
219                if options.log_graphs {
220                    info!("{}\n", String::from_utf8_lossy(&input));
221                }
222
223                // Perform a random series of edits and reparse.
224                let edit_count = rand.unsigned(*EDIT_COUNT);
225                let mut undo_stack = Vec::with_capacity(edit_count);
226                for _ in 0..=edit_count {
227                    let edit = get_random_edit(&mut rand, &input);
228                    undo_stack.push(invert_edit(&input, &edit));
229                    perform_edit(&mut tree, &mut input, &edit).unwrap();
230                }
231
232                if log_seed {
233                    info!("   {test_index}.{trial:<2} seed: {seed}");
234                }
235
236                if dump_edits {
237                    fs::create_dir_all("fuzz").unwrap();
238                    fs::write(
239                        Path::new("fuzz")
240                            .join(format!("edit.{seed}.{test_index}.{trial} {test_name}")),
241                        &input,
242                    )
243                    .unwrap();
244                }
245
246                if options.log_graphs {
247                    info!("{}\n", String::from_utf8_lossy(&input));
248                }
249
250                set_included_ranges(&mut parser, &input, test.template_delimiters);
251                let mut tree2 = parser.parse(&input, Some(&tree)).unwrap();
252
253                // Check that the new tree is consistent.
254                check_consistent_sizes(&tree2, &input);
255                if let Err(message) = check_changed_ranges(&tree, &tree2, &input) {
256                    error!("\nUnexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n",);
257                    return false;
258                }
259
260                // Undo all of the edits and re-parse again.
261                while let Some(edit) = undo_stack.pop() {
262                    perform_edit(&mut tree2, &mut input, &edit).unwrap();
263                }
264                if options.log_graphs {
265                    info!("{}\n", String::from_utf8_lossy(&input));
266                }
267
268                set_included_ranges(&mut parser, &test.input, test.template_delimiters);
269                let tree3 = parser.parse(&input, Some(&tree2)).unwrap();
270
271                // Verify that the final tree matches the expectation from the corpus.
272                let mut actual_output = tree3.root_node().to_sexp();
273                if !test.has_fields {
274                    actual_output = strip_sexp_fields(&actual_output);
275                }
276
277                if actual_output != test.output && !test.error {
278                    println!("Incorrect parse for {test_name} - seed {seed}");
279                    DiffKey::print();
280                    println!("{}", TestDiff::new(&actual_output, &test.output));
281                    println!();
282                    return false;
283                }
284
285                // Check that the edited tree is consistent.
286                check_consistent_sizes(&tree3, &input);
287                if let Err(message) = check_changed_ranges(&tree2, &tree3, &input) {
288                    error!("Unexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n");
289                    return false;
290                }
291
292                true
293            }).unwrap_or_else(|e| {
294                error!("{e}");
295                false
296            });
297
298            if !passed {
299                failure_count += 1;
300                break;
301            }
302        }
303    }
304
305    if failure_count != 0 {
306        info!("{failure_count} {language_name} corpus tests failed fuzzing");
307    }
308
309    skipped.retain(|_, v| *v == 0);
310
311    if !skipped.is_empty() {
312        info!("Non matchable skip definitions:");
313        for k in skipped.keys() {
314            info!("  {k}");
315        }
316        panic!("Non matchable skip definitions need to be removed");
317    }
318}
319
320pub struct FlattenedTest {
321    pub name: String,
322    pub input: Vec<u8>,
323    pub output: String,
324    pub languages: Vec<Box<str>>,
325    pub error: bool,
326    pub skip: bool,
327    pub has_fields: bool,
328    pub template_delimiters: Option<(&'static str, &'static str)>,
329}
330
331#[must_use]
332pub fn flatten_tests(
333    test: TestEntry,
334    include: Option<&Regex>,
335    exclude: Option<&Regex>,
336) -> Vec<FlattenedTest> {
337    fn helper(
338        test: TestEntry,
339        include: Option<&Regex>,
340        exclude: Option<&Regex>,
341        is_root: bool,
342        prefix: &str,
343        result: &mut Vec<FlattenedTest>,
344    ) {
345        match test {
346            TestEntry::Example {
347                mut name,
348                input,
349                output,
350                has_fields,
351                attributes,
352                ..
353            } => {
354                if !prefix.is_empty() {
355                    name.insert_str(0, " - ");
356                    name.insert_str(0, prefix);
357                }
358
359                if let Some(include) = include {
360                    if !include.is_match(&name) {
361                        return;
362                    }
363                } else if let Some(exclude) = exclude {
364                    if exclude.is_match(&name) {
365                        return;
366                    }
367                }
368
369                result.push(FlattenedTest {
370                    name,
371                    input,
372                    output,
373                    has_fields,
374                    languages: attributes.languages,
375                    error: attributes.error,
376                    skip: attributes.skip,
377                    template_delimiters: None,
378                });
379            }
380            TestEntry::Group {
381                mut name, children, ..
382            } => {
383                if !is_root && !prefix.is_empty() {
384                    name.insert_str(0, " - ");
385                    name.insert_str(0, prefix);
386                }
387                for child in children {
388                    helper(child, include, exclude, false, &name, result);
389                }
390            }
391        }
392    }
393    let mut result = Vec::new();
394    helper(test, include, exclude, true, "", &mut result);
395    result
396}