tree_sitter_cli/fuzz/
mod.rs

1use std::{collections::HashMap, env, fs, path::Path, sync::LazyLock};
2
3use rand::Rng;
4use regex::Regex;
5use tree_sitter::{Language, Parser};
6
7pub mod allocations;
8pub mod corpus_test;
9pub mod edits;
10pub mod random;
11pub mod scope_sequence;
12
13use crate::{
14    fuzz::{
15        corpus_test::{
16            check_changed_ranges, check_consistent_sizes, get_parser, set_included_ranges,
17        },
18        edits::{get_random_edit, invert_edit},
19        random::Rand,
20    },
21    parse::perform_edit,
22    test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields, TestEntry},
23};
24
25pub static LOG_ENABLED: LazyLock<bool> = LazyLock::new(|| env::var("TREE_SITTER_LOG").is_ok());
26
27pub static LOG_GRAPH_ENABLED: LazyLock<bool> =
28    LazyLock::new(|| env::var("TREE_SITTER_LOG_GRAPHS").is_ok());
29
30pub static LANGUAGE_FILTER: LazyLock<Option<String>> =
31    LazyLock::new(|| env::var("TREE_SITTER_LANGUAGE").ok());
32
33pub static EXAMPLE_INCLUDE: LazyLock<Option<Regex>> =
34    LazyLock::new(|| regex_env_var("TREE_SITTER_EXAMPLE_INCLUDE"));
35
36pub static EXAMPLE_EXCLUDE: LazyLock<Option<Regex>> =
37    LazyLock::new(|| regex_env_var("TREE_SITTER_EXAMPLE_EXCLUDE"));
38
39pub static START_SEED: LazyLock<usize> = LazyLock::new(new_seed);
40
41pub static EDIT_COUNT: LazyLock<usize> =
42    LazyLock::new(|| int_env_var("TREE_SITTER_EDITS").unwrap_or(3));
43
44pub static ITERATION_COUNT: LazyLock<usize> =
45    LazyLock::new(|| int_env_var("TREE_SITTER_ITERATIONS").unwrap_or(10));
46
47fn int_env_var(name: &'static str) -> Option<usize> {
48    env::var(name).ok().and_then(|e| e.parse().ok())
49}
50
51fn regex_env_var(name: &'static str) -> Option<Regex> {
52    env::var(name).ok().and_then(|e| Regex::new(&e).ok())
53}
54
55#[must_use]
56pub fn new_seed() -> usize {
57    int_env_var("TREE_SITTER_SEED").unwrap_or_else(|| {
58        let mut rng = rand::thread_rng();
59        let seed = rng.gen::<usize>();
60        eprintln!("Seed: {seed}");
61        seed
62    })
63}
64
65pub struct FuzzOptions {
66    pub skipped: Option<Vec<String>>,
67    pub subdir: Option<String>,
68    pub edits: usize,
69    pub iterations: usize,
70    pub include: Option<Regex>,
71    pub exclude: Option<Regex>,
72    pub log_graphs: bool,
73    pub log: bool,
74}
75
76pub fn fuzz_language_corpus(
77    language: &Language,
78    language_name: &str,
79    start_seed: usize,
80    grammar_dir: &Path,
81    options: &mut FuzzOptions,
82) {
83    fn retain(entry: &mut TestEntry, language_name: &str) -> bool {
84        match entry {
85            TestEntry::Example { attributes, .. } => {
86                attributes.languages[0].is_empty()
87                    || attributes
88                        .languages
89                        .iter()
90                        .any(|lang| lang.as_ref() == language_name)
91            }
92            TestEntry::Group {
93                ref mut children, ..
94            } => {
95                children.retain_mut(|child| retain(child, language_name));
96                !children.is_empty()
97            }
98        }
99    }
100
101    let subdir = options.subdir.take().unwrap_or_default();
102
103    let corpus_dir = grammar_dir.join(subdir).join("test").join("corpus");
104
105    if !corpus_dir.exists() || !corpus_dir.is_dir() {
106        eprintln!("No corpus directory found, ensure that you have a `test/corpus` directory in your grammar directory with at least one test file.");
107        return;
108    }
109
110    if std::fs::read_dir(&corpus_dir).unwrap().count() == 0 {
111        eprintln!("No corpus files found in `test/corpus`, ensure that you have at least one test file in your corpus directory.");
112        return;
113    }
114
115    let mut main_tests = parse_tests(&corpus_dir).unwrap();
116    match main_tests {
117        TestEntry::Group {
118            ref mut children, ..
119        } => {
120            children.retain_mut(|child| retain(child, language_name));
121        }
122        TestEntry::Example { .. } => unreachable!(),
123    }
124    let tests = flatten_tests(
125        main_tests,
126        options.include.as_ref(),
127        options.exclude.as_ref(),
128    );
129
130    let get_test_name = |test: &FlattenedTest| format!("{language_name} - {}", test.name);
131
132    let mut skipped = options
133        .skipped
134        .take()
135        .unwrap_or_default()
136        .into_iter()
137        .chain(tests.iter().filter(|x| x.skip).map(get_test_name))
138        .map(|x| (x, 0))
139        .collect::<HashMap<String, usize>>();
140
141    let mut failure_count = 0;
142
143    let log_seed = env::var("TREE_SITTER_LOG_SEED").is_ok();
144    let dump_edits = env::var("TREE_SITTER_DUMP_EDITS").is_ok();
145
146    if log_seed {
147        println!("  start seed: {start_seed}");
148    }
149
150    println!();
151    for (test_index, test) in tests.iter().enumerate() {
152        let test_name = get_test_name(test);
153        if let Some(counter) = skipped.get_mut(test_name.as_str()) {
154            println!("  {test_index}. {test_name} - SKIPPED");
155            *counter += 1;
156            continue;
157        }
158
159        println!("  {test_index}. {test_name}");
160
161        let passed = allocations::record(|| {
162            let mut log_session = None;
163            let mut parser = get_parser(&mut log_session, "log.html");
164            parser.set_language(language).unwrap();
165            set_included_ranges(&mut parser, &test.input, test.template_delimiters);
166
167            let tree = parser.parse(&test.input, None).unwrap();
168
169            if test.error {
170                return true;
171            }
172
173            let mut actual_output = tree.root_node().to_sexp();
174            if !test.has_fields {
175                actual_output = strip_sexp_fields(&actual_output);
176            }
177
178            if actual_output != test.output {
179                println!("Incorrect initial parse for {test_name}");
180                print_diff_key();
181                print_diff(&actual_output, &test.output, true);
182                println!();
183                return false;
184            }
185
186            true
187        })
188        .unwrap_or_else(|e| {
189            eprintln!("Error: {e}");
190            false
191        });
192
193        if !passed {
194            failure_count += 1;
195            continue;
196        }
197
198        let mut parser = Parser::new();
199        parser.set_language(language).unwrap();
200        let tree = parser.parse(&test.input, None).unwrap();
201        drop(parser);
202
203        for trial in 0..options.iterations {
204            let seed = start_seed + trial;
205            let passed = allocations::record(|| {
206                let mut rand = Rand::new(seed);
207                let mut log_session = None;
208                let mut parser = get_parser(&mut log_session, "log.html");
209                parser.set_language(language).unwrap();
210                let mut tree = tree.clone();
211                let mut input = test.input.clone();
212
213                if options.log_graphs {
214                    eprintln!("{}\n", String::from_utf8_lossy(&input));
215                }
216
217                // Perform a random series of edits and reparse.
218                let edit_count = rand.unsigned(*EDIT_COUNT);
219                let mut undo_stack = Vec::with_capacity(edit_count);
220                for _ in 0..=edit_count {
221                    let edit = get_random_edit(&mut rand, &input);
222                    undo_stack.push(invert_edit(&input, &edit));
223                    perform_edit(&mut tree, &mut input, &edit).unwrap();
224                }
225
226                if log_seed {
227                    println!("   {test_index}.{trial:<2} seed: {seed}");
228                }
229
230                if dump_edits {
231                    fs::create_dir_all("fuzz").unwrap();
232                    fs::write(
233                        Path::new("fuzz")
234                            .join(format!("edit.{seed}.{test_index}.{trial} {test_name}")),
235                        &input,
236                    )
237                    .unwrap();
238                }
239
240                if options.log_graphs {
241                    eprintln!("{}\n", String::from_utf8_lossy(&input));
242                }
243
244                set_included_ranges(&mut parser, &input, test.template_delimiters);
245                let mut tree2 = parser.parse(&input, Some(&tree)).unwrap();
246
247                // Check that the new tree is consistent.
248                check_consistent_sizes(&tree2, &input);
249                if let Err(message) = check_changed_ranges(&tree, &tree2, &input) {
250                    println!("\nUnexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n",);
251                    return false;
252                }
253
254                // Undo all of the edits and re-parse again.
255                while let Some(edit) = undo_stack.pop() {
256                    perform_edit(&mut tree2, &mut input, &edit).unwrap();
257                }
258                if options.log_graphs {
259                    eprintln!("{}\n", String::from_utf8_lossy(&input));
260                }
261
262                set_included_ranges(&mut parser, &test.input, test.template_delimiters);
263                let tree3 = parser.parse(&input, Some(&tree2)).unwrap();
264
265                // Verify that the final tree matches the expectation from the corpus.
266                let mut actual_output = tree3.root_node().to_sexp();
267                if !test.has_fields {
268                    actual_output = strip_sexp_fields(&actual_output);
269                }
270
271                if actual_output != test.output && !test.error {
272                    println!("Incorrect parse for {test_name} - seed {seed}");
273                    print_diff_key();
274                    print_diff(&actual_output, &test.output, true);
275                    println!();
276                    return false;
277                }
278
279                // Check that the edited tree is consistent.
280                check_consistent_sizes(&tree3, &input);
281                if let Err(message) = check_changed_ranges(&tree2, &tree3, &input) {
282                    println!("Unexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n");
283                    return false;
284                }
285
286                true
287            }).unwrap_or_else(|e| {
288                eprintln!("Error: {e}");
289                false
290            });
291
292            if !passed {
293                failure_count += 1;
294                break;
295            }
296        }
297    }
298
299    if failure_count != 0 {
300        eprintln!("{failure_count} {language_name} corpus tests failed fuzzing");
301    }
302
303    skipped.retain(|_, v| *v == 0);
304
305    if !skipped.is_empty() {
306        println!("Non matchable skip definitions:");
307        for k in skipped.keys() {
308            println!("  {k}");
309        }
310        panic!("Non matchable skip definitions needs to be removed");
311    }
312}
313
314pub struct FlattenedTest {
315    pub name: String,
316    pub input: Vec<u8>,
317    pub output: String,
318    pub languages: Vec<Box<str>>,
319    pub error: bool,
320    pub skip: bool,
321    pub has_fields: bool,
322    pub template_delimiters: Option<(&'static str, &'static str)>,
323}
324
325#[must_use]
326pub fn flatten_tests(
327    test: TestEntry,
328    include: Option<&Regex>,
329    exclude: Option<&Regex>,
330) -> Vec<FlattenedTest> {
331    fn helper(
332        test: TestEntry,
333        include: Option<&Regex>,
334        exclude: Option<&Regex>,
335        is_root: bool,
336        prefix: &str,
337        result: &mut Vec<FlattenedTest>,
338    ) {
339        match test {
340            TestEntry::Example {
341                mut name,
342                input,
343                output,
344                has_fields,
345                attributes,
346                ..
347            } => {
348                if !prefix.is_empty() {
349                    name.insert_str(0, " - ");
350                    name.insert_str(0, prefix);
351                }
352
353                if let Some(include) = include {
354                    if !include.is_match(&name) {
355                        return;
356                    }
357                } else if let Some(exclude) = exclude {
358                    if exclude.is_match(&name) {
359                        return;
360                    }
361                }
362
363                result.push(FlattenedTest {
364                    name,
365                    input,
366                    output,
367                    has_fields,
368                    languages: attributes.languages,
369                    error: attributes.error,
370                    skip: attributes.skip,
371                    template_delimiters: None,
372                });
373            }
374            TestEntry::Group {
375                mut name, children, ..
376            } => {
377                if !is_root && !prefix.is_empty() {
378                    name.insert_str(0, " - ");
379                    name.insert_str(0, prefix);
380                }
381                for child in children {
382                    helper(child, include, exclude, false, &name, result);
383                }
384            }
385        }
386    }
387    let mut result = Vec::new();
388    helper(test, include, exclude, true, "", &mut result);
389    result
390}