tree-sitter-cli 0.26.8

CLI tool for developing, testing, and using Tree-sitter parsers
Documentation
use std::{
    collections::HashMap,
    env, fs,
    path::{Path, PathBuf},
    sync::LazyLock,
};

use log::{error, info};
use rand::Rng;
use regex::Regex;
use tree_sitter::{Language, Parser};

pub mod allocations;
pub mod corpus_test;
pub mod edits;
pub mod random;
pub mod scope_sequence;

use crate::{
    fuzz::{
        corpus_test::{
            check_changed_ranges, check_consistent_sizes, get_parser, set_included_ranges,
        },
        edits::{get_random_edit, invert_edit},
        random::Rand,
    },
    parse::perform_edit,
    test::{parse_tests, strip_sexp_fields, DiffKey, TestDiff, TestEntry},
};

pub static LOG_ENABLED: LazyLock<bool> = LazyLock::new(|| env::var("TREE_SITTER_LOG").is_ok());

pub static LOG_GRAPH_ENABLED: LazyLock<bool> =
    LazyLock::new(|| env::var("TREE_SITTER_LOG_GRAPHS").is_ok());

pub static LANGUAGE_FILTER: LazyLock<Option<String>> =
    LazyLock::new(|| env::var("TREE_SITTER_LANGUAGE").ok());

pub static EXAMPLE_INCLUDE: LazyLock<Option<Regex>> =
    LazyLock::new(|| regex_env_var("TREE_SITTER_EXAMPLE_INCLUDE"));

pub static EXAMPLE_EXCLUDE: LazyLock<Option<Regex>> =
    LazyLock::new(|| regex_env_var("TREE_SITTER_EXAMPLE_EXCLUDE"));

pub static START_SEED: LazyLock<usize> = LazyLock::new(new_seed);

pub const DEFAULT_EDIT_COUNT: usize = 3;
pub static EDIT_COUNT: LazyLock<usize> =
    LazyLock::new(|| int_env_var("TREE_SITTER_EDITS").unwrap_or(DEFAULT_EDIT_COUNT));

pub const DEFAULT_ITERATION_COUNT: usize = 10;
pub static ITERATION_COUNT: LazyLock<usize> =
    LazyLock::new(|| int_env_var("TREE_SITTER_ITERATIONS").unwrap_or(DEFAULT_ITERATION_COUNT));

fn int_env_var(name: &'static str) -> Option<usize> {
    env::var(name).ok().and_then(|e| e.parse().ok())
}

fn regex_env_var(name: &'static str) -> Option<Regex> {
    env::var(name).ok().and_then(|e| Regex::new(&e).ok())
}

#[must_use]
pub fn new_seed() -> usize {
    int_env_var("TREE_SITTER_SEED").unwrap_or_else(|| {
        let mut rng = rand::thread_rng();
        let seed = rng.gen::<usize>();
        info!("Seed: {seed}");
        seed
    })
}

pub struct FuzzOptions {
    pub skipped: Option<Vec<String>>,
    pub subdir: Option<PathBuf>,
    pub edits: usize,
    pub iterations: usize,
    pub include: Option<Regex>,
    pub exclude: Option<Regex>,
    pub log_graphs: bool,
    pub log: bool,
}

pub fn fuzz_language_corpus(
    language: &Language,
    language_name: &str,
    start_seed: usize,
    grammar_dir: &Path,
    options: &mut FuzzOptions,
) {
    fn retain(entry: &mut TestEntry, language_name: &str) -> bool {
        match entry {
            TestEntry::Example { attributes, .. } => {
                attributes.languages[0].is_empty()
                    || attributes
                        .languages
                        .iter()
                        .any(|lang| lang.as_ref() == language_name)
            }
            TestEntry::Group {
                ref mut children, ..
            } => {
                children.retain_mut(|child| retain(child, language_name));
                !children.is_empty()
            }
        }
    }

    let subdir = options.subdir.take().unwrap_or_default();

    let corpus_dir = grammar_dir.join(subdir).join("test").join("corpus");

    if !corpus_dir.exists() || !corpus_dir.is_dir() {
        error!("No corpus directory found, ensure that you have a `test/corpus` directory in your grammar directory with at least one test file.");
        return;
    }

    if std::fs::read_dir(&corpus_dir).unwrap().count() == 0 {
        error!("No corpus files found in `test/corpus`, ensure that you have at least one test file in your corpus directory.");
        return;
    }

    let mut main_tests = parse_tests(&corpus_dir).unwrap();
    match main_tests {
        TestEntry::Group {
            ref mut children, ..
        } => {
            children.retain_mut(|child| retain(child, language_name));
        }
        TestEntry::Example { .. } => unreachable!(),
    }
    let tests = flatten_tests(
        main_tests,
        options.include.as_ref(),
        options.exclude.as_ref(),
    );

    let get_test_name = |test: &FlattenedTest| format!("{language_name} - {}", test.name);

    let mut skipped = options
        .skipped
        .take()
        .unwrap_or_default()
        .into_iter()
        .chain(tests.iter().filter(|x| x.skip).map(get_test_name))
        .map(|x| (x, 0))
        .collect::<HashMap<String, usize>>();

    let mut failure_count = 0;

    let log_seed = env::var("TREE_SITTER_LOG_SEED").is_ok();
    let dump_edits = env::var("TREE_SITTER_DUMP_EDITS").is_ok();

    if log_seed {
        info!("  start seed: {start_seed}");
    }

    println!();
    for (test_index, test) in tests.iter().enumerate() {
        let test_name = get_test_name(test);
        if let Some(counter) = skipped.get_mut(test_name.as_str()) {
            println!("  {test_index}. {test_name} - SKIPPED");
            *counter += 1;
            continue;
        }

        println!("  {test_index}. {test_name}");

        let passed = allocations::record_checked(|| {
            let mut log_session = None;
            let mut parser = get_parser(&mut log_session, "log.html");
            parser.set_language(language).unwrap();
            set_included_ranges(&mut parser, &test.input, test.template_delimiters);

            let tree = parser.parse(&test.input, None).unwrap();

            if test.error {
                return true;
            }

            let mut actual_output = tree.root_node().to_sexp();
            if !test.has_fields {
                actual_output = strip_sexp_fields(&actual_output);
            }

            if actual_output != test.output {
                println!("Incorrect initial parse for {test_name}");
                DiffKey::print();
                println!("{}", TestDiff::new(&actual_output, &test.output));
                println!();
                return false;
            }

            true
        })
        .unwrap_or_else(|e| {
            error!("{e}");
            false
        });

        if !passed {
            failure_count += 1;
            continue;
        }

        let mut parser = Parser::new();
        parser.set_language(language).unwrap();
        let tree = parser.parse(&test.input, None).unwrap();
        drop(parser);

        for trial in 0..options.iterations {
            let seed = start_seed + trial;
            let passed = allocations::record_checked(|| {
                let mut rand = Rand::new(seed);
                let mut log_session = None;
                let mut parser = get_parser(&mut log_session, "log.html");
                parser.set_language(language).unwrap();
                let mut tree = tree.clone();
                let mut input = test.input.clone();

                if options.log_graphs {
                    info!("{}\n", String::from_utf8_lossy(&input));
                }

                // Perform a random series of edits and reparse.
                let edit_count = rand.unsigned(options.edits);
                let mut undo_stack = Vec::with_capacity(edit_count);
                for _ in 0..=edit_count {
                    let edit = get_random_edit(&mut rand, &input);
                    undo_stack.push(invert_edit(&input, &edit));
                    perform_edit(&mut tree, &mut input, &edit).unwrap();
                }

                if log_seed {
                    info!("   {test_index}.{trial:<2} seed: {seed}");
                }

                if dump_edits {
                    fs::create_dir_all("fuzz").unwrap();
                    fs::write(
                        Path::new("fuzz")
                            .join(format!("edit.{seed}.{test_index}.{trial} {test_name}")),
                        &input,
                    )
                    .unwrap();
                }

                if options.log_graphs {
                    info!("{}\n", String::from_utf8_lossy(&input));
                }

                set_included_ranges(&mut parser, &input, test.template_delimiters);
                let mut tree2 = parser.parse(&input, Some(&tree)).unwrap();

                // Check that the new tree is consistent.
                check_consistent_sizes(&tree2, &input);
                if let Err(message) = check_changed_ranges(&tree, &tree2, &input) {
                    error!("\nUnexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n",);
                    return false;
                }

                // Undo all of the edits and re-parse again.
                while let Some(edit) = undo_stack.pop() {
                    perform_edit(&mut tree2, &mut input, &edit).unwrap();
                }
                if options.log_graphs {
                    info!("{}\n", String::from_utf8_lossy(&input));
                }

                set_included_ranges(&mut parser, &test.input, test.template_delimiters);
                let tree3 = parser.parse(&input, Some(&tree2)).unwrap();

                // Verify that the final tree matches the expectation from the corpus.
                let mut actual_output = tree3.root_node().to_sexp();
                if !test.has_fields {
                    actual_output = strip_sexp_fields(&actual_output);
                }

                if actual_output != test.output && !test.error {
                    println!("Incorrect parse for {test_name} - seed {seed}");
                    DiffKey::print();
                    println!("{}", TestDiff::new(&actual_output, &test.output));
                    println!();
                    return false;
                }

                // Check that the edited tree is consistent.
                check_consistent_sizes(&tree3, &input);
                if let Err(message) = check_changed_ranges(&tree2, &tree3, &input) {
                    error!("Unexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n");
                    return false;
                }

                true
            }).unwrap_or_else(|e| {
                error!("{e}");
                false
            });

            if !passed {
                failure_count += 1;
                break;
            }
        }
    }

    if failure_count != 0 {
        info!("{failure_count} {language_name} corpus tests failed fuzzing");
    }

    skipped.retain(|_, v| *v == 0);

    if !skipped.is_empty() {
        info!("Non matchable skip definitions:");
        for k in skipped.keys() {
            info!("  {k}");
        }
        panic!("Non matchable skip definitions need to be removed");
    }
}

pub struct FlattenedTest {
    pub name: String,
    pub input: Vec<u8>,
    pub output: String,
    pub languages: Vec<Box<str>>,
    pub error: bool,
    pub skip: bool,
    pub has_fields: bool,
    pub template_delimiters: Option<(&'static str, &'static str)>,
}

#[must_use]
pub fn flatten_tests(
    test: TestEntry,
    include: Option<&Regex>,
    exclude: Option<&Regex>,
) -> Vec<FlattenedTest> {
    fn helper(
        test: TestEntry,
        include: Option<&Regex>,
        exclude: Option<&Regex>,
        is_root: bool,
        prefix: &str,
        result: &mut Vec<FlattenedTest>,
    ) {
        match test {
            TestEntry::Example {
                mut name,
                input,
                output,
                has_fields,
                attributes,
                ..
            } => {
                if !prefix.is_empty() {
                    name.insert_str(0, " - ");
                    name.insert_str(0, prefix);
                }

                if let Some(include) = include {
                    if !include.is_match(&name) {
                        return;
                    }
                } else if let Some(exclude) = exclude {
                    if exclude.is_match(&name) {
                        return;
                    }
                }

                result.push(FlattenedTest {
                    name,
                    input,
                    output,
                    has_fields,
                    languages: attributes.languages,
                    error: attributes.error,
                    skip: attributes.skip,
                    template_delimiters: None,
                });
            }
            TestEntry::Group {
                mut name, children, ..
            } => {
                if !is_root && !prefix.is_empty() {
                    name.insert_str(0, " - ");
                    name.insert_str(0, prefix);
                }
                for child in children {
                    helper(child, include, exclude, false, &name, result);
                }
            }
        }
    }
    let mut result = Vec::new();
    helper(test, include, exclude, true, "", &mut result);
    result
}