parsuna 0.1.0

Parsuna: recoverable, pull-based parsers with precise errors
Documentation
//! Transpile a `.parsuna` grammar into a tree-sitter `grammar.js`.
//!
//! The emitted grammar is a faithful shape-preserving translation: each
//! rule becomes a tree-sitter rule using `seq`/`choice`/`repeat`/etc.,
//! tokens become `token(...)` wrappers, and skip tokens become the
//! `extras` list. Useful for plugging a parsuna grammar into an editor
//! tool-chain that already speaks tree-sitter.

use std::fmt::Write;

use crate::analysis::AnalyzedGrammar;
use crate::grammar::ir::*;

/// Produce the contents of a tree-sitter `grammar.js` for `ag`.
pub fn emit(ag: &AnalyzedGrammar) -> String {
    let g = &ag.grammar;
    let mut out = String::new();
    writeln!(out, "// Generated by parsuna. Do not edit by hand.").unwrap();
    writeln!(out, "// Grammar: {}", g.name).unwrap();
    writeln!(out).unwrap();
    writeln!(out, "module.exports = grammar({{").unwrap();
    writeln!(out, "  name: {},", js_string(&sanitize_name(&g.name))).unwrap();
    writeln!(out).unwrap();

    let skips: Vec<&TokenDef> = g
        .tokens
        .iter()
        .filter(|t| t.skip && !t.is_fragment)
        .collect();
    write!(out, "  extras: $ => [").unwrap();
    for (i, t) in skips.iter().enumerate() {
        if i > 0 {
            out.push_str(", ");
        }
        write!(out, "$.{}", t.name).unwrap();
    }
    writeln!(out, "],").unwrap();
    writeln!(out).unwrap();

    writeln!(out, "  rules: {{").unwrap();

    let mut need_blank = false;
    for r in g.rules.iter().filter(|r| !r.is_fragment) {
        if need_blank {
            writeln!(out).unwrap();
        }
        writeln!(out, "    {}: $ => {},", r.name, render_expr(&r.body)).unwrap();
        need_blank = true;
    }
    for r in g.rules.iter().filter(|r| r.is_fragment) {
        if need_blank {
            writeln!(out).unwrap();
        }
        writeln!(out, "    {}: $ => {},", r.name, render_expr(&r.body)).unwrap();
        need_blank = true;
    }
    for t in g.tokens.iter().filter(|t| !t.is_fragment) {
        if need_blank {
            writeln!(out).unwrap();
        }
        writeln!(
            out,
            "    {}: $ => token({}),",
            t.name,
            render_token(&t.pattern, g)
        )
        .unwrap();
        need_blank = true;
    }

    writeln!(out, "  }},").unwrap();
    writeln!(out, "}});").unwrap();
    out
}

fn render_expr(e: &Expr) -> String {
    match e {
        Expr::Empty => "blank()".to_string(),
        Expr::Token(name) | Expr::Rule(name) => format!("$.{}", name),
        Expr::Seq(xs) => {
            let parts: Vec<String> = xs.iter().map(render_expr).collect();
            format!("seq({})", parts.join(", "))
        }
        Expr::Alt(xs) => {
            let parts: Vec<String> = xs.iter().map(render_expr).collect();
            format!("choice({})", parts.join(", "))
        }
        Expr::Opt(x) => format!("optional({})", render_expr(x)),
        Expr::Star(x) => format!("repeat({})", render_expr(x)),
        Expr::Plus(x) => format!("repeat1({})", render_expr(x)),
    }
}

fn render_token(p: &TokenPattern, g: &Grammar) -> String {
    match p {
        TokenPattern::Empty => "''".to_string(),
        TokenPattern::Literal(s) => js_string(s),
        TokenPattern::Class(cc) => render_class(cc),
        TokenPattern::Ref(name) => {
            
            let t = g.token(name).expect("reference validated upstream");
            render_token(&t.pattern, g)
        }
        TokenPattern::Seq(xs) => {
            let parts: Vec<String> = xs.iter().map(|x| render_token(x, g)).collect();
            format!("seq({})", parts.join(", "))
        }
        TokenPattern::Alt(xs) => {
            let parts: Vec<String> = xs.iter().map(|x| render_token(x, g)).collect();
            format!("choice({})", parts.join(", "))
        }
        TokenPattern::Opt(x) => format!("optional({})", render_token(x, g)),
        TokenPattern::Star(x) => format!("repeat({})", render_token(x, g)),
        TokenPattern::Plus(x) => format!("repeat1({})", render_token(x, g)),
    }
}

fn render_class(cc: &CharClass) -> String {
    let mut out = String::from("/[");
    if cc.negated {
        out.push('^');
    }
    for item in &cc.items {
        match *item {
            ClassItem::Char(c) => out.push_str(&escape_class_char(c)),
            ClassItem::Range(lo, hi) => {
                out.push_str(&escape_class_char(lo));
                out.push('-');
                out.push_str(&escape_class_char(hi));
            }
        }
    }
    out.push_str("]/");
    out
}

fn escape_class_char(cp: u32) -> String {
    let c = match char::from_u32(cp) {
        Some(c) => c,
        None => return format!("\\u{:04x}", cp),
    };
    match c {
        '\\' => "\\\\".into(),
        ']' => "\\]".into(),
        '/' => "\\/".into(),
        '^' => "\\^".into(),
        '-' => "\\-".into(),
        '\n' => "\\n".into(),
        '\r' => "\\r".into(),
        '\t' => "\\t".into(),
        _ if (cp < 0x20) || (cp == 0x7F) || (cp > 0x7E) => format!("\\u{:04x}", cp),
        c => c.to_string(),
    }
}

fn js_string(s: &str) -> String {
    let mut out = String::with_capacity(s.len() + 2);
    out.push('\'');
    for c in s.chars() {
        match c {
            '\'' => out.push_str("\\'"),
            '\\' => out.push_str("\\\\"),
            '\n' => out.push_str("\\n"),
            '\r' => out.push_str("\\r"),
            '\t' => out.push_str("\\t"),
            c if (c as u32) < 0x20 => {
                let _ = write!(out, "\\u{:04x}", c as u32);
            }
            c => out.push(c),
        }
    }
    out.push('\'');
    out
}

fn sanitize_name(s: &str) -> String {
    let mut out = String::with_capacity(s.len().max(1));
    for c in s.chars() {
        if c.is_ascii_alphanumeric() || c == '_' {
            out.push(c);
        } else {
            out.push('_');
        }
    }
    if out.is_empty() || out.chars().next().map_or(false, |c| c.is_ascii_digit()) {
        out.insert(0, '_');
    }
    out
}