tree_sitter_cli/fuzz/
mod.rs

1use std::{collections::HashMap, env, fs, path::Path, sync::LazyLock};
2
3use rand::Rng;
4use regex::Regex;
5use tree_sitter::{Language, Parser};
6
7pub mod allocations;
8pub mod corpus_test;
9pub mod edits;
10pub mod random;
11pub mod scope_sequence;
12
13use crate::{
14    fuzz::{
15        corpus_test::{
16            check_changed_ranges, check_consistent_sizes, get_parser, set_included_ranges,
17        },
18        edits::{get_random_edit, invert_edit},
19        random::Rand,
20    },
21    parse::perform_edit,
22    test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields, TestEntry},
23};
24
25pub static LOG_ENABLED: LazyLock<bool> = LazyLock::new(|| env::var("TREE_SITTER_LOG").is_ok());
26
27pub static LOG_GRAPH_ENABLED: LazyLock<bool> =
28    LazyLock::new(|| env::var("TREE_SITTER_LOG_GRAPHS").is_ok());
29
30pub static LANGUAGE_FILTER: LazyLock<Option<String>> =
31    LazyLock::new(|| env::var("TREE_SITTER_LANGUAGE").ok());
32
33pub static EXAMPLE_INCLUDE: LazyLock<Option<Regex>> =
34    LazyLock::new(|| regex_env_var("TREE_SITTER_EXAMPLE_INCLUDE"));
35
36pub static EXAMPLE_EXCLUDE: LazyLock<Option<Regex>> =
37    LazyLock::new(|| regex_env_var("TREE_SITTER_EXAMPLE_EXCLUDE"));
38
39pub static START_SEED: LazyLock<usize> = LazyLock::new(new_seed);
40
41pub static EDIT_COUNT: LazyLock<usize> =
42    LazyLock::new(|| int_env_var("TREE_SITTER_EDITS").unwrap_or(3));
43
44pub static ITERATION_COUNT: LazyLock<usize> =
45    LazyLock::new(|| int_env_var("TREE_SITTER_ITERATIONS").unwrap_or(10));
46
47fn int_env_var(name: &'static str) -> Option<usize> {
48    env::var(name).ok().and_then(|e| e.parse().ok())
49}
50
51fn regex_env_var(name: &'static str) -> Option<Regex> {
52    env::var(name).ok().and_then(|e| Regex::new(&e).ok())
53}
54
55#[must_use]
56pub fn new_seed() -> usize {
57    int_env_var("TREE_SITTER_SEED").unwrap_or_else(|| {
58        let mut rng = rand::thread_rng();
59        rng.gen::<usize>()
60    })
61}
62
63pub struct FuzzOptions {
64    pub skipped: Option<Vec<String>>,
65    pub subdir: Option<String>,
66    pub edits: usize,
67    pub iterations: usize,
68    pub include: Option<Regex>,
69    pub exclude: Option<Regex>,
70    pub log_graphs: bool,
71    pub log: bool,
72}
73
74pub fn fuzz_language_corpus(
75    language: &Language,
76    language_name: &str,
77    start_seed: usize,
78    grammar_dir: &Path,
79    options: &mut FuzzOptions,
80) {
81    fn retain(entry: &mut TestEntry, language_name: &str) -> bool {
82        match entry {
83            TestEntry::Example { attributes, .. } => {
84                attributes.languages[0].is_empty()
85                    || attributes
86                        .languages
87                        .iter()
88                        .any(|lang| lang.as_ref() == language_name)
89            }
90            TestEntry::Group {
91                ref mut children, ..
92            } => {
93                children.retain_mut(|child| retain(child, language_name));
94                !children.is_empty()
95            }
96        }
97    }
98
99    let subdir = options.subdir.take().unwrap_or_default();
100
101    let corpus_dir = grammar_dir.join(subdir).join("test").join("corpus");
102
103    if !corpus_dir.exists() || !corpus_dir.is_dir() {
104        eprintln!("No corpus directory found, ensure that you have a `test/corpus` directory in your grammar directory with at least one test file.");
105        return;
106    }
107
108    if std::fs::read_dir(&corpus_dir).unwrap().count() == 0 {
109        eprintln!("No corpus files found in `test/corpus`, ensure that you have at least one test file in your corpus directory.");
110        return;
111    }
112
113    let mut main_tests = parse_tests(&corpus_dir).unwrap();
114    match main_tests {
115        TestEntry::Group {
116            ref mut children, ..
117        } => {
118            children.retain_mut(|child| retain(child, language_name));
119        }
120        TestEntry::Example { .. } => unreachable!(),
121    }
122    let tests = flatten_tests(
123        main_tests,
124        options.include.as_ref(),
125        options.exclude.as_ref(),
126    );
127
128    let get_test_name = |test: &FlattenedTest| format!("{language_name} - {}", test.name);
129
130    let mut skipped = options
131        .skipped
132        .take()
133        .unwrap_or_default()
134        .into_iter()
135        .chain(tests.iter().filter(|x| x.skip).map(get_test_name))
136        .map(|x| (x, 0))
137        .collect::<HashMap<String, usize>>();
138
139    let mut failure_count = 0;
140
141    let log_seed = env::var("TREE_SITTER_LOG_SEED").is_ok();
142    let dump_edits = env::var("TREE_SITTER_DUMP_EDITS").is_ok();
143
144    if log_seed {
145        println!("  start seed: {start_seed}");
146    }
147
148    println!();
149    for (test_index, test) in tests.iter().enumerate() {
150        let test_name = get_test_name(test);
151        if let Some(counter) = skipped.get_mut(test_name.as_str()) {
152            println!("  {test_index}. {test_name} - SKIPPED");
153            *counter += 1;
154            continue;
155        }
156
157        println!("  {test_index}. {test_name}");
158
159        let passed = allocations::record(|| {
160            let mut log_session = None;
161            let mut parser = get_parser(&mut log_session, "log.html");
162            parser.set_language(language).unwrap();
163            set_included_ranges(&mut parser, &test.input, test.template_delimiters);
164
165            let tree = parser.parse(&test.input, None).unwrap();
166
167            if test.error {
168                return true;
169            }
170
171            let mut actual_output = tree.root_node().to_sexp();
172            if !test.has_fields {
173                actual_output = strip_sexp_fields(&actual_output);
174            }
175
176            if actual_output != test.output {
177                println!("Incorrect initial parse for {test_name}");
178                print_diff_key();
179                print_diff(&actual_output, &test.output, true);
180                println!();
181                return false;
182            }
183
184            true
185        })
186        .unwrap_or_else(|e| {
187            eprintln!("Error: {e}");
188            false
189        });
190
191        if !passed {
192            failure_count += 1;
193            continue;
194        }
195
196        let mut parser = Parser::new();
197        parser.set_language(language).unwrap();
198        let tree = parser.parse(&test.input, None).unwrap();
199        drop(parser);
200
201        for trial in 0..options.iterations {
202            let seed = start_seed + trial;
203            let passed = allocations::record(|| {
204                let mut rand = Rand::new(seed);
205                let mut log_session = None;
206                let mut parser = get_parser(&mut log_session, "log.html");
207                parser.set_language(language).unwrap();
208                let mut tree = tree.clone();
209                let mut input = test.input.clone();
210
211                if options.log_graphs {
212                    eprintln!("{}\n", String::from_utf8_lossy(&input));
213                }
214
215                // Perform a random series of edits and reparse.
216                let mut undo_stack = Vec::new();
217                for _ in 0..=rand.unsigned(*EDIT_COUNT) {
218                    let edit = get_random_edit(&mut rand, &input);
219                    undo_stack.push(invert_edit(&input, &edit));
220                    perform_edit(&mut tree, &mut input, &edit).unwrap();
221                }
222
223                if log_seed {
224                    println!("   {test_index}.{trial:<2} seed: {seed}");
225                }
226
227                if dump_edits {
228                    fs::create_dir_all("fuzz").unwrap();
229                    fs::write(
230                        Path::new("fuzz")
231                            .join(format!("edit.{seed}.{test_index}.{trial} {test_name}")),
232                        &input,
233                    )
234                    .unwrap();
235                }
236
237                if options.log_graphs {
238                    eprintln!("{}\n", String::from_utf8_lossy(&input));
239                }
240
241                set_included_ranges(&mut parser, &input, test.template_delimiters);
242                let mut tree2 = parser.parse(&input, Some(&tree)).unwrap();
243
244                // Check that the new tree is consistent.
245                check_consistent_sizes(&tree2, &input);
246                if let Err(message) = check_changed_ranges(&tree, &tree2, &input) {
247                    println!("\nUnexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n",);
248                    return false;
249                }
250
251                // Undo all of the edits and re-parse again.
252                while let Some(edit) = undo_stack.pop() {
253                    perform_edit(&mut tree2, &mut input, &edit).unwrap();
254                }
255                if options.log_graphs {
256                    eprintln!("{}\n", String::from_utf8_lossy(&input));
257                }
258
259                set_included_ranges(&mut parser, &test.input, test.template_delimiters);
260                let tree3 = parser.parse(&input, Some(&tree2)).unwrap();
261
262                // Verify that the final tree matches the expectation from the corpus.
263                let mut actual_output = tree3.root_node().to_sexp();
264                if !test.has_fields {
265                    actual_output = strip_sexp_fields(&actual_output);
266                }
267
268                if actual_output != test.output && !test.error {
269                    println!("Incorrect parse for {test_name} - seed {seed}");
270                    print_diff_key();
271                    print_diff(&actual_output, &test.output, true);
272                    println!();
273                    return false;
274                }
275
276                // Check that the edited tree is consistent.
277                check_consistent_sizes(&tree3, &input);
278                if let Err(message) = check_changed_ranges(&tree2, &tree3, &input) {
279                    println!("Unexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n");
280                    return false;
281                }
282
283                true
284            }).unwrap_or_else(|e| {
285                eprintln!("Error: {e}");
286                false
287            });
288
289            if !passed {
290                failure_count += 1;
291                break;
292            }
293        }
294    }
295
296    if failure_count != 0 {
297        eprintln!("{failure_count} {language_name} corpus tests failed fuzzing");
298    }
299
300    skipped.retain(|_, v| *v == 0);
301
302    if !skipped.is_empty() {
303        println!("Non matchable skip definitions:");
304        for k in skipped.keys() {
305            println!("  {k}");
306        }
307        panic!("Non matchable skip definitions needs to be removed");
308    }
309}
310
311pub struct FlattenedTest {
312    pub name: String,
313    pub input: Vec<u8>,
314    pub output: String,
315    pub languages: Vec<Box<str>>,
316    pub error: bool,
317    pub skip: bool,
318    pub has_fields: bool,
319    pub template_delimiters: Option<(&'static str, &'static str)>,
320}
321
322#[must_use]
323pub fn flatten_tests(
324    test: TestEntry,
325    include: Option<&Regex>,
326    exclude: Option<&Regex>,
327) -> Vec<FlattenedTest> {
328    fn helper(
329        test: TestEntry,
330        include: Option<&Regex>,
331        exclude: Option<&Regex>,
332        is_root: bool,
333        prefix: &str,
334        result: &mut Vec<FlattenedTest>,
335    ) {
336        match test {
337            TestEntry::Example {
338                mut name,
339                input,
340                output,
341                has_fields,
342                attributes,
343                ..
344            } => {
345                if !prefix.is_empty() {
346                    name.insert_str(0, " - ");
347                    name.insert_str(0, prefix);
348                }
349
350                if let Some(include) = include {
351                    if !include.is_match(&name) {
352                        return;
353                    }
354                } else if let Some(exclude) = exclude {
355                    if exclude.is_match(&name) {
356                        return;
357                    }
358                }
359
360                result.push(FlattenedTest {
361                    name,
362                    input,
363                    output,
364                    has_fields,
365                    languages: attributes.languages,
366                    error: attributes.error,
367                    skip: attributes.skip,
368                    template_delimiters: None,
369                });
370            }
371            TestEntry::Group {
372                mut name, children, ..
373            } => {
374                if !is_root && !prefix.is_empty() {
375                    name.insert_str(0, " - ");
376                    name.insert_str(0, prefix);
377                }
378                for child in children {
379                    helper(child, include, exclude, false, &name, result);
380                }
381            }
382        }
383    }
384    let mut result = Vec::new();
385    helper(test, include, exclude, true, "", &mut result);
386    result
387}