Skip to main content

tree_sitter_cli/
fuzz.rs

1use std::{
2    collections::HashMap,
3    env, fs,
4    path::{Path, PathBuf},
5    sync::LazyLock,
6};
7
8use log::{error, info};
9use rand::Rng;
10use regex::Regex;
11use tree_sitter::{Language, Parser};
12
13pub mod allocations;
14pub mod corpus_test;
15pub mod edits;
16pub mod random;
17pub mod scope_sequence;
18
19use crate::{
20    fuzz::{
21        corpus_test::{
22            check_changed_ranges, check_consistent_sizes, get_parser, set_included_ranges,
23        },
24        edits::{get_random_edit, invert_edit},
25        random::Rand,
26    },
27    parse::perform_edit,
28    test::{parse_tests, strip_sexp_fields, DiffKey, TestDiff, TestEntry},
29};
30
31pub static LOG_ENABLED: LazyLock<bool> = LazyLock::new(|| env::var("TREE_SITTER_LOG").is_ok());
32
33pub static LOG_GRAPH_ENABLED: LazyLock<bool> =
34    LazyLock::new(|| env::var("TREE_SITTER_LOG_GRAPHS").is_ok());
35
36pub static LANGUAGE_FILTER: LazyLock<Option<String>> =
37    LazyLock::new(|| env::var("TREE_SITTER_LANGUAGE").ok());
38
39pub static EXAMPLE_INCLUDE: LazyLock<Option<Regex>> =
40    LazyLock::new(|| regex_env_var("TREE_SITTER_EXAMPLE_INCLUDE"));
41
42pub static EXAMPLE_EXCLUDE: LazyLock<Option<Regex>> =
43    LazyLock::new(|| regex_env_var("TREE_SITTER_EXAMPLE_EXCLUDE"));
44
45pub static START_SEED: LazyLock<usize> = LazyLock::new(new_seed);
46
47pub const DEFAULT_EDIT_COUNT: usize = 3;
48pub static EDIT_COUNT: LazyLock<usize> =
49    LazyLock::new(|| int_env_var("TREE_SITTER_EDITS").unwrap_or(DEFAULT_EDIT_COUNT));
50
51pub const DEFAULT_ITERATION_COUNT: usize = 10;
52pub static ITERATION_COUNT: LazyLock<usize> =
53    LazyLock::new(|| int_env_var("TREE_SITTER_ITERATIONS").unwrap_or(DEFAULT_ITERATION_COUNT));
54
55fn int_env_var(name: &'static str) -> Option<usize> {
56    env::var(name).ok().and_then(|e| e.parse().ok())
57}
58
59fn regex_env_var(name: &'static str) -> Option<Regex> {
60    env::var(name).ok().and_then(|e| Regex::new(&e).ok())
61}
62
63#[must_use]
64pub fn new_seed() -> usize {
65    int_env_var("TREE_SITTER_SEED").unwrap_or_else(|| {
66        let mut rng = rand::thread_rng();
67        let seed = rng.gen::<usize>();
68        info!("Seed: {seed}");
69        seed
70    })
71}
72
73pub struct FuzzOptions {
74    pub skipped: Option<Vec<String>>,
75    pub subdir: Option<PathBuf>,
76    pub edits: usize,
77    pub iterations: usize,
78    pub include: Option<Regex>,
79    pub exclude: Option<Regex>,
80    pub log_graphs: bool,
81    pub log: bool,
82}
83
84pub fn fuzz_language_corpus(
85    language: &Language,
86    language_name: &str,
87    start_seed: usize,
88    grammar_dir: &Path,
89    options: &mut FuzzOptions,
90) {
91    fn retain(entry: &mut TestEntry, language_name: &str) -> bool {
92        match entry {
93            TestEntry::Example { attributes, .. } => {
94                attributes.languages[0].is_empty()
95                    || attributes
96                        .languages
97                        .iter()
98                        .any(|lang| lang.as_ref() == language_name)
99            }
100            TestEntry::Group {
101                ref mut children, ..
102            } => {
103                children.retain_mut(|child| retain(child, language_name));
104                !children.is_empty()
105            }
106        }
107    }
108
109    let subdir = options.subdir.take().unwrap_or_default();
110
111    let corpus_dir = grammar_dir.join(subdir).join("test").join("corpus");
112
113    if !corpus_dir.exists() || !corpus_dir.is_dir() {
114        error!("No corpus directory found, ensure that you have a `test/corpus` directory in your grammar directory with at least one test file.");
115        return;
116    }
117
118    if std::fs::read_dir(&corpus_dir).unwrap().count() == 0 {
119        error!("No corpus files found in `test/corpus`, ensure that you have at least one test file in your corpus directory.");
120        return;
121    }
122
123    let mut main_tests = parse_tests(&corpus_dir).unwrap();
124    match main_tests {
125        TestEntry::Group {
126            ref mut children, ..
127        } => {
128            children.retain_mut(|child| retain(child, language_name));
129        }
130        TestEntry::Example { .. } => unreachable!(),
131    }
132    let tests = flatten_tests(
133        main_tests,
134        options.include.as_ref(),
135        options.exclude.as_ref(),
136    );
137
138    let get_test_name = |test: &FlattenedTest| format!("{language_name} - {}", test.name);
139
140    let mut skipped = options
141        .skipped
142        .take()
143        .unwrap_or_default()
144        .into_iter()
145        .chain(tests.iter().filter(|x| x.skip).map(get_test_name))
146        .map(|x| (x, 0))
147        .collect::<HashMap<String, usize>>();
148
149    let mut failure_count = 0;
150
151    let log_seed = env::var("TREE_SITTER_LOG_SEED").is_ok();
152    let dump_edits = env::var("TREE_SITTER_DUMP_EDITS").is_ok();
153
154    if log_seed {
155        info!("  start seed: {start_seed}");
156    }
157
158    println!();
159    for (test_index, test) in tests.iter().enumerate() {
160        let test_name = get_test_name(test);
161        if let Some(counter) = skipped.get_mut(test_name.as_str()) {
162            println!("  {test_index}. {test_name} - SKIPPED");
163            *counter += 1;
164            continue;
165        }
166
167        println!("  {test_index}. {test_name}");
168
169        let passed = allocations::record_checked(|| {
170            let mut log_session = None;
171            let mut parser = get_parser(&mut log_session, "log.html");
172            parser.set_language(language).unwrap();
173            set_included_ranges(&mut parser, &test.input, test.template_delimiters);
174
175            let tree = parser.parse(&test.input, None).unwrap();
176
177            if test.error {
178                return true;
179            }
180
181            let mut actual_output = tree.root_node().to_sexp();
182            if !test.has_fields {
183                actual_output = strip_sexp_fields(&actual_output);
184            }
185
186            if actual_output != test.output {
187                println!("Incorrect initial parse for {test_name}");
188                DiffKey::print();
189                println!("{}", TestDiff::new(&actual_output, &test.output));
190                println!();
191                return false;
192            }
193
194            true
195        })
196        .unwrap_or_else(|e| {
197            error!("{e}");
198            false
199        });
200
201        if !passed {
202            failure_count += 1;
203            continue;
204        }
205
206        let mut parser = Parser::new();
207        parser.set_language(language).unwrap();
208        let tree = parser.parse(&test.input, None).unwrap();
209        drop(parser);
210
211        for trial in 0..options.iterations {
212            let seed = start_seed + trial;
213            let passed = allocations::record_checked(|| {
214                let mut rand = Rand::new(seed);
215                let mut log_session = None;
216                let mut parser = get_parser(&mut log_session, "log.html");
217                parser.set_language(language).unwrap();
218                let mut tree = tree.clone();
219                let mut input = test.input.clone();
220
221                if options.log_graphs {
222                    info!("{}\n", String::from_utf8_lossy(&input));
223                }
224
225                // Perform a random series of edits and reparse.
226                let edit_count = rand.unsigned(options.edits);
227                let mut undo_stack = Vec::with_capacity(edit_count);
228                for _ in 0..=edit_count {
229                    let edit = get_random_edit(&mut rand, &input);
230                    undo_stack.push(invert_edit(&input, &edit));
231                    perform_edit(&mut tree, &mut input, &edit).unwrap();
232                }
233
234                if log_seed {
235                    info!("   {test_index}.{trial:<2} seed: {seed}");
236                }
237
238                if dump_edits {
239                    fs::create_dir_all("fuzz").unwrap();
240                    fs::write(
241                        Path::new("fuzz")
242                            .join(format!("edit.{seed}.{test_index}.{trial} {test_name}")),
243                        &input,
244                    )
245                    .unwrap();
246                }
247
248                if options.log_graphs {
249                    info!("{}\n", String::from_utf8_lossy(&input));
250                }
251
252                set_included_ranges(&mut parser, &input, test.template_delimiters);
253                let mut tree2 = parser.parse(&input, Some(&tree)).unwrap();
254
255                // Check that the new tree is consistent.
256                check_consistent_sizes(&tree2, &input);
257                if let Err(message) = check_changed_ranges(&tree, &tree2, &input) {
258                    error!("\nUnexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n",);
259                    return false;
260                }
261
262                // Undo all of the edits and re-parse again.
263                while let Some(edit) = undo_stack.pop() {
264                    perform_edit(&mut tree2, &mut input, &edit).unwrap();
265                }
266                if options.log_graphs {
267                    info!("{}\n", String::from_utf8_lossy(&input));
268                }
269
270                set_included_ranges(&mut parser, &test.input, test.template_delimiters);
271                let tree3 = parser.parse(&input, Some(&tree2)).unwrap();
272
273                // Verify that the final tree matches the expectation from the corpus.
274                let mut actual_output = tree3.root_node().to_sexp();
275                if !test.has_fields {
276                    actual_output = strip_sexp_fields(&actual_output);
277                }
278
279                if actual_output != test.output && !test.error {
280                    println!("Incorrect parse for {test_name} - seed {seed}");
281                    DiffKey::print();
282                    println!("{}", TestDiff::new(&actual_output, &test.output));
283                    println!();
284                    return false;
285                }
286
287                // Check that the edited tree is consistent.
288                check_consistent_sizes(&tree3, &input);
289                if let Err(message) = check_changed_ranges(&tree2, &tree3, &input) {
290                    error!("Unexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n");
291                    return false;
292                }
293
294                true
295            }).unwrap_or_else(|e| {
296                error!("{e}");
297                false
298            });
299
300            if !passed {
301                failure_count += 1;
302                break;
303            }
304        }
305    }
306
307    if failure_count != 0 {
308        info!("{failure_count} {language_name} corpus tests failed fuzzing");
309    }
310
311    skipped.retain(|_, v| *v == 0);
312
313    if !skipped.is_empty() {
314        info!("Non matchable skip definitions:");
315        for k in skipped.keys() {
316            info!("  {k}");
317        }
318        panic!("Non matchable skip definitions need to be removed");
319    }
320}
321
322pub struct FlattenedTest {
323    pub name: String,
324    pub input: Vec<u8>,
325    pub output: String,
326    pub languages: Vec<Box<str>>,
327    pub error: bool,
328    pub skip: bool,
329    pub has_fields: bool,
330    pub template_delimiters: Option<(&'static str, &'static str)>,
331}
332
333#[must_use]
334pub fn flatten_tests(
335    test: TestEntry,
336    include: Option<&Regex>,
337    exclude: Option<&Regex>,
338) -> Vec<FlattenedTest> {
339    fn helper(
340        test: TestEntry,
341        include: Option<&Regex>,
342        exclude: Option<&Regex>,
343        is_root: bool,
344        prefix: &str,
345        result: &mut Vec<FlattenedTest>,
346    ) {
347        match test {
348            TestEntry::Example {
349                mut name,
350                input,
351                output,
352                has_fields,
353                attributes,
354                ..
355            } => {
356                if !prefix.is_empty() {
357                    name.insert_str(0, " - ");
358                    name.insert_str(0, prefix);
359                }
360
361                if let Some(include) = include {
362                    if !include.is_match(&name) {
363                        return;
364                    }
365                } else if let Some(exclude) = exclude {
366                    if exclude.is_match(&name) {
367                        return;
368                    }
369                }
370
371                result.push(FlattenedTest {
372                    name,
373                    input,
374                    output,
375                    has_fields,
376                    languages: attributes.languages,
377                    error: attributes.error,
378                    skip: attributes.skip,
379                    template_delimiters: None,
380                });
381            }
382            TestEntry::Group {
383                mut name, children, ..
384            } => {
385                if !is_root && !prefix.is_empty() {
386                    name.insert_str(0, " - ");
387                    name.insert_str(0, prefix);
388                }
389                for child in children {
390                    helper(child, include, exclude, false, &name, result);
391                }
392            }
393        }
394    }
395    let mut result = Vec::new();
396    helper(test, include, exclude, true, "", &mut result);
397    result
398}