shape-vm 0.1.8

Stack-based bytecode virtual machine for the Shape programming language
Documentation
//! Build script to extract grammar rules from pest file
//!
//! This generates a Rust file containing all rule names from the Shape
//! pest grammar, which is used for coverage analysis.

use std::collections::BTreeSet;
use std::fs;
use std::path::Path;

fn main() {
    generate_grammar_features();
}

fn generate_grammar_features() {
    let out_dir = std::env::var("OUT_DIR").unwrap();
    let dest_path = Path::new(&out_dir).join("grammar_features.rs");

    // Path to the pest grammar file (relative to shape-vm)
    let pest_path = Path::new("../shape-ast/src/shape.pest");

    let rules = if pest_path.exists() {
        extract_pest_rules(pest_path)
    } else {
        // Fallback if file not found (shouldn't happen in normal builds)
        eprintln!("Warning: pest grammar not found at {:?}", pest_path);
        BTreeSet::new()
    };

    // Generate Rust code
    let rules_array: String = rules
        .iter()
        .map(|r| format!("    \"{}\",", r))
        .collect::<Vec<_>>()
        .join("\n");

    let generated = format!(
        r#"// Auto-generated from shape.pest - DO NOT EDIT
// This file contains all grammar rule names extracted from the pest grammar.
// Generated by shape-vm/build.rs

/// All grammar rules extracted from shape.pest
pub const PEST_RULES: &[&str] = &[
{}
];
"#,
        rules_array
    );

    fs::write(&dest_path, generated).expect("Failed to write grammar_features.rs");

    // Tell Cargo to re-run if the pest file changes
    println!("cargo:rerun-if-changed=../shape-ast/src/shape.pest");
}

/// Extract rule names from a pest grammar file
///
/// Pest rules have the format:
/// - `rule_name = { ... }`
/// - `rule_name = _{ ... }` (silent)
/// - `rule_name = @{ ... }` (atomic)
/// - `rule_name = ${ ... }` (compound atomic)
/// - `rule_name = !{ ... }` (non-atomic)
fn extract_pest_rules(path: &Path) -> BTreeSet<String> {
    let content = fs::read_to_string(path).expect("Failed to read pest grammar");
    let mut rules = BTreeSet::new();

    for line in content.lines() {
        let line = line.trim();

        // Skip comments and empty lines
        if line.is_empty() || line.starts_with("//") {
            continue;
        }

        // Match rule definitions: `rule_name = ...{`
        // Rule names are identifiers followed by `=` and optional modifier before `{`
        if let Some(eq_pos) = line.find('=') {
            let before_eq = line[..eq_pos].trim();

            // Rule name should be a valid identifier (alphanumeric + underscore)
            if is_valid_rule_name(before_eq) {
                // Check that what follows looks like a rule body
                let after_eq = line[eq_pos + 1..].trim();
                if after_eq.starts_with('{')
                    || after_eq.starts_with("_{")
                    || after_eq.starts_with("@{")
                    || after_eq.starts_with("${")
                    || after_eq.starts_with("!{")
                {
                    rules.insert(before_eq.to_string());
                }
            }
        }
    }

    rules
}

/// Check if a string is a valid pest rule name
fn is_valid_rule_name(s: &str) -> bool {
    !s.is_empty()
        && s.chars()
            .next()
            .is_some_and(|c| c.is_ascii_alphabetic() || c == '_')
        && s.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
}