use std::fmt::Write;
use std::path::PathBuf;
use crate::codegen::common::pascal;
use crate::codegen::EmittedFile;
use crate::lowering::{DispatchLeaf, DispatchTree, Op, StateTable};
pub fn emit(st: &StateTable) -> Vec<EmittedFile> {
let mut s = String::new();
emit_prelude(&mut s, st);
emit_token_kinds(&mut s, st);
emit_rule_kinds(&mut s, st);
emit_reexports(&mut s);
emit_lexer_config(&mut s, st);
emit_tables(&mut s, st);
emit_parser(&mut s);
emit_step(&mut s, st);
emit_public_api(&mut s, st);
let stem = if st.grammar_name.is_empty() {
"parser".to_string()
} else {
st.grammar_name.clone()
};
vec![EmittedFile {
path: PathBuf::from(format!("{}.rs", stem)),
contents: s,
}]
}
fn emit_prelude(s: &mut String, _st: &StateTable) {
writeln!(s, "//! Generated by parsuna — do not edit by hand.").unwrap();
writeln!(s, "//!").unwrap();
writeln!(
s,
"//! Pull-based, recoverable parser. Call one of the `parse_*_from_str` /"
)
.unwrap();
writeln!(
s,
"//! `parse_*_from_reader` constructors and iterate the resulting [`Parser`]"
)
.unwrap();
writeln!(
s,
"//! to walk the parse as a flat [`Event`] stream."
)
.unwrap();
writeln!(s, "#![allow(dead_code, unused_imports)]").unwrap();
writeln!(s).unwrap();
writeln!(s, "use std::io::Read;").unwrap();
writeln!(
s,
"use parsuna_rt::{{DfaConfig, LexerBackend, Scanner, StreamingLexer, TERMINATED}};"
)
.unwrap();
writeln!(s).unwrap();
}
fn emit_token_kinds(s: &mut String, st: &StateTable) {
writeln!(s, "/// The set of token kinds this grammar can emit.").unwrap();
writeln!(s, "///").unwrap();
writeln!(
s,
"/// `Eof` marks end-of-input and `Error` is produced by the lexer when"
)
.unwrap();
writeln!(
s,
"/// no pattern matches at the current position; the other variants come"
)
.unwrap();
writeln!(s, "/// from the grammar's `token` declarations.").unwrap();
writeln!(s, "#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]").unwrap();
writeln!(s, "#[repr(i16)]").unwrap();
writeln!(s, "pub enum TokenKind {{").unwrap();
writeln!(s, " Eof = 0,").unwrap();
writeln!(s, " Error = -1,").unwrap();
for t in &st.tokens {
writeln!(s, " {} = {},", pascal(&t.name), t.kind).unwrap();
}
writeln!(s, "}}").unwrap();
writeln!(s).unwrap();
writeln!(s, "impl TokenKind {{").unwrap();
writeln!(s, " /// Numeric discriminant, equal to `self as i16`.").unwrap();
writeln!(s, " pub const fn id(self) -> i16 {{ self as i16 }}").unwrap();
writeln!(s, "}}").unwrap();
writeln!(s).unwrap();
writeln!(s, "impl parsuna_rt::TokenKindEnum for TokenKind {{").unwrap();
writeln!(s, " fn name(self) -> &'static str {{").unwrap();
writeln!(s, " match self {{").unwrap();
writeln!(s, " TokenKind::Eof => \"EOF\",").unwrap();
writeln!(s, " TokenKind::Error => \"ERROR\",").unwrap();
for t in &st.tokens {
writeln!(
s,
" TokenKind::{} => \"{}\",",
pascal(&t.name),
t.name
)
.unwrap();
}
writeln!(s, " }}").unwrap();
writeln!(s, " }}").unwrap();
writeln!(s, " const EOF: Self = Self::Eof;").unwrap();
writeln!(s, "}}").unwrap();
writeln!(s).unwrap();
}
fn emit_rule_kinds(s: &mut String, st: &StateTable) {
writeln!(s, "/// One variant per non-fragment rule in the grammar.").unwrap();
writeln!(s, "///").unwrap();
writeln!(
s,
"/// Attached to [`Event::Enter`] and [`Event::Exit`] so consumers can"
)
.unwrap();
writeln!(s, "/// tell which rule a subtree corresponds to.").unwrap();
writeln!(s, "#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]").unwrap();
writeln!(s, "#[repr(u16)]").unwrap();
writeln!(s, "pub enum RuleKind {{").unwrap();
for (i, n) in st.rule_kinds.iter().enumerate() {
writeln!(s, " {} = {},", pascal(n), i).unwrap();
}
writeln!(s, "}}").unwrap();
writeln!(s).unwrap();
writeln!(s, "impl RuleKind {{").unwrap();
writeln!(s, " /// Numeric discriminant, equal to `self as u16`.").unwrap();
writeln!(s, " pub const fn id(self) -> u16 {{ self as u16 }}").unwrap();
writeln!(s, "}}").unwrap();
writeln!(s).unwrap();
writeln!(s, "impl parsuna_rt::RuleKindEnum for RuleKind {{").unwrap();
writeln!(s, " fn name(self) -> &'static str {{").unwrap();
writeln!(s, " match self {{").unwrap();
for n in &st.rule_kinds {
writeln!(s, " RuleKind::{} => \"{}\",", pascal(n), n).unwrap();
}
if st.rule_kinds.is_empty() {}
writeln!(s, " }}").unwrap();
writeln!(s, " }}").unwrap();
writeln!(s, "}}").unwrap();
writeln!(s).unwrap();
}
fn emit_reexports(s: &mut String) {
s.push_str("\n\n");
s.push_str(" s.push_str("pub use parsuna_rt::{Span, Pos, Error};\n");
s.push_str(" s.push_str("pub type Event<'a> = parsuna_rt::Event<'a, TokenKind, RuleKind>;\n");
s.push_str("/// A lexed token with this grammar's [`TokenKind`].\n");
s.push_str("pub type Token<'a> = parsuna_rt::Token<'a, TokenKind>;\n\n");
}
fn emit_lexer_config(s: &mut String, st: &StateTable) {
let dfa = &st.lexer_dfa;
let n_states = dfa.states.len();
writeln!(s).unwrap();
writeln!(s, "static DFA_TRANS: &[u32] = &[").unwrap();
for (i, st_) in dfa.states.iter().enumerate() {
write!(s, " ").unwrap();
for (j, t) in st_.trans.iter().enumerate() {
write!(s, "{}", t).unwrap();
if !(i == n_states - 1 && j == 255) {
write!(s, ", ").unwrap();
}
}
writeln!(s).unwrap();
}
writeln!(s, "];").unwrap();
writeln!(s).unwrap();
writeln!(s, "static DFA_ACCEPT: &[i16] = &[").unwrap();
write!(s, " ").unwrap();
for (i, st_) in dfa.states.iter().enumerate() {
let v = st_.accept.unwrap_or(0);
write!(s, "{}", v).unwrap();
if i != n_states - 1 {
write!(s, ", ").unwrap();
}
}
writeln!(s).unwrap();
writeln!(s, "];").unwrap();
writeln!(s).unwrap();
writeln!(s, "/// Packed lexer DFA for this grammar.").unwrap();
writeln!(
s,
"///"
)
.unwrap();
writeln!(
s,
"/// Exposed so callers can plug it into a custom [`LexerBackend`] if the"
)
.unwrap();
writeln!(
s,
"/// default `Scanner`/`StreamingLexer` don't fit their use case."
)
.unwrap();
writeln!(s, "pub const LEXER_CONFIG: DfaConfig = DfaConfig {{").unwrap();
writeln!(s, " states: {},", n_states).unwrap();
writeln!(s, " start: {},", dfa.start).unwrap();
writeln!(s, " trans: DFA_TRANS,").unwrap();
writeln!(s, " accept: DFA_ACCEPT,").unwrap();
writeln!(s, "}};").unwrap();
writeln!(s).unwrap();
write!(s, "static SKIP_KINDS: &[i16] = &[").unwrap();
let mut first = true;
for t in &st.tokens {
if t.skip {
if !first {
s.push_str(", ");
}
first = false;
write!(s, "{}", t.kind).unwrap();
}
}
writeln!(s, "];").unwrap();
writeln!(s).unwrap();
}
fn emit_tables(s: &mut String, st: &StateTable) {
writeln!(s).unwrap();
writeln!(s, "/// Lookahead required to disambiguate every alternative (LL(k)).").unwrap();
writeln!(s, "pub const K: usize = {};", st.k).unwrap();
for (name, id) in &st.entry_states {
writeln!(s, "const ENTRY_{}: u32 = {};", name.to_uppercase(), id).unwrap();
}
writeln!(s).unwrap();
for (i, f) in st.first_sets.iter().enumerate() {
for (j, seq) in f.iter().enumerate() {
write!(s, "static FIRST_{}_{}: &[TokenKind] = &[", i, j).unwrap();
for (k, t) in seq.iter().enumerate() {
if k > 0 {
s.push_str(", ");
}
write!(s, "{}", token_variant(st, *t)).unwrap();
}
writeln!(s, "];").unwrap();
}
write!(s, "static FIRST_{}: &[&[TokenKind]] = &[", i).unwrap();
for j in 0..f.len() {
if j > 0 {
s.push_str(", ");
}
write!(s, "FIRST_{}_{}", i, j).unwrap();
}
writeln!(s, "];").unwrap();
}
writeln!(s).unwrap();
for (i, f) in st.sync_sets.iter().enumerate() {
write!(s, "static SYNC_{}: &[TokenKind] = &[", i).unwrap();
for (k, t) in f.iter().enumerate() {
if k > 0 {
s.push_str(", ");
}
write!(s, "{}", token_variant(st, *t)).unwrap();
}
writeln!(s, "];").unwrap();
}
writeln!(s).unwrap();
}
fn token_variant(st: &StateTable, kind: i16) -> String {
if kind == 0 {
return "TokenKind::Eof".to_string();
}
if kind == -1 {
return "TokenKind::Error".to_string();
}
match st.tokens.iter().find(|t| t.kind == kind) {
Some(t) => format!("TokenKind::{}", pascal(&t.name)),
None => panic!("unknown token id {} while emitting Rust backend", kind),
}
}
fn rule_variant(st: &StateTable, kind: u16) -> String {
let name = st
.rule_kinds
.get(kind as usize)
.unwrap_or_else(|| panic!("unknown rule kind id {} while emitting Rust backend", kind));
format!("RuleKind::{}", pascal(name))
}
fn emit_parser(s: &mut String) {
s.push_str(
r#"
/// Zero-sized marker type that carries the generated dispatch logic via
/// [`parsuna_rt::Drive`]. You never construct one directly — it's a
/// type-level parameter to [`Parser`].
pub struct Grammar;
/// Parser alias pinning the grammar and lookahead. `L` is any
/// [`LexerBackend`]; the generated `parse_*_from_str`/`parse_*_from_reader`
/// helpers build a parser with either [`Scanner`] or [`StreamingLexer`].
pub type Parser<'a, L> = parsuna_rt::Parser<'a, L, K, Grammar>;
"#,
);
}
fn kind_pattern(st: &StateTable, first: &[Vec<i16>]) -> String {
let mut kinds: Vec<i16> = first
.iter()
.map(|seq| {
assert_eq!(seq.len(), 1, "kind_pattern expects LL(1) singletons");
seq[0]
})
.collect();
kinds.sort_unstable();
kinds.dedup();
kinds
.iter()
.map(|k| token_variant(st, *k))
.collect::<Vec<_>>()
.join(" | ")
}
fn emit_op(s: &mut String, table: &StateTable, self_id: u32, op: &Op, ind: &str) {
match op {
Op::Enter(k) => writeln!(s, "{}p.enter({});", ind, rule_variant(table, *k)).unwrap(),
Op::Exit(k) => writeln!(s, "{}p.exit({});", ind, rule_variant(table, *k)).unwrap(),
Op::Expect {
kind,
token_name,
sync,
} => writeln!(
s,
"{}p.expect({}, SYNC_{}, \"expected {}\");",
ind,
token_variant(table, *kind),
sync,
token_name
)
.unwrap(),
Op::PushRet(r) => writeln!(s, "{}p.push_ret({});", ind, r).unwrap(),
Op::Jump(n) => writeln!(s, "{}cur = {};", ind, n).unwrap(),
Op::Ret => writeln!(s, "{}cur = p.ret();", ind).unwrap(),
Op::Star { first, body, next } => {
emit_lookahead_branch(
s,
table,
*first,
ind,
|s, ind| {
writeln!(s, "{}p.push_ret({}); cur = {};", ind, self_id, body).unwrap();
},
|s, ind| {
writeln!(s, "{}cur = {};", ind, next).unwrap();
},
None,
);
}
Op::Opt { first, body, next } => {
emit_lookahead_branch(
s,
table,
*first,
ind,
|s, ind| {
writeln!(s, "{}p.push_ret({}); cur = {};", ind, next, body).unwrap();
},
|s, ind| {
writeln!(s, "{}cur = {};", ind, next).unwrap();
},
None,
);
}
Op::Dispatch { tree, sync, next } => {
emit_dispatch_tree(s, table, tree, *sync, *next, ind);
}
}
}
fn emit_lookahead_branch(
s: &mut String,
table: &StateTable,
first: u32,
ind: &str,
on_match: impl FnOnce(&mut String, &str),
on_miss: impl FnOnce(&mut String, &str),
_unused: Option<()>,
) {
if table.k == 1 {
let pat = kind_pattern(table, &table.first_sets[first as usize]);
writeln!(s, "{}match p.look(0).kind {{", ind).unwrap();
writeln!(s, "{} {} => {{", ind, pat).unwrap();
on_match(s, &format!("{} ", ind));
writeln!(s, "{} }}", ind).unwrap();
writeln!(s, "{} _ => {{", ind).unwrap();
on_miss(s, &format!("{} ", ind));
writeln!(s, "{} }}", ind).unwrap();
writeln!(s, "{}}}", ind).unwrap();
} else {
writeln!(s, "{}if p.matches_first(FIRST_{}) {{", ind, first).unwrap();
on_match(s, &format!("{} ", ind));
writeln!(s, "{}}} else {{", ind).unwrap();
on_miss(s, &format!("{} ", ind));
writeln!(s, "{}}}", ind).unwrap();
}
}
fn emit_dispatch_tree(
s: &mut String,
table: &StateTable,
tree: &DispatchTree,
sync: u32,
next: u32,
ind: &str,
) {
match tree {
DispatchTree::Leaf(leaf) => emit_dispatch_leaf(s, leaf, sync, next, ind),
DispatchTree::Switch {
depth,
arms,
default,
} => {
writeln!(s, "{}match p.look({}).kind {{", ind, depth).unwrap();
let inner = format!("{} ", ind);
let inner2 = format!("{} ", inner);
for (kind, sub) in arms {
let pat = token_variant(table, *kind);
match sub {
DispatchTree::Leaf(leaf) => {
write!(s, "{}{} => {{ ", inner, pat).unwrap();
emit_leaf_inline(s, leaf, sync, next);
writeln!(s, "}}").unwrap();
}
_ => {
writeln!(s, "{}{} => {{", inner, pat).unwrap();
emit_dispatch_tree(s, table, sub, sync, next, &inner2);
writeln!(s, "{}}}", inner).unwrap();
}
}
}
write!(s, "{}_ => {{ ", inner).unwrap();
emit_leaf_inline(s, default, sync, next);
writeln!(s, "}}").unwrap();
writeln!(s, "{}}}", ind).unwrap();
}
}
}
fn emit_dispatch_leaf(s: &mut String, leaf: &DispatchLeaf, sync: u32, next: u32, ind: &str) {
match leaf {
DispatchLeaf::Arm(t) => writeln!(s, "{}p.push_ret({}); cur = {};", ind, next, t).unwrap(),
DispatchLeaf::Fallthrough => writeln!(s, "{}cur = {};", ind, next).unwrap(),
DispatchLeaf::Error => {
writeln!(s, "{}cur = {};", ind, next).unwrap();
writeln!(s, "{}p.error_here(\"unexpected token\");", ind).unwrap();
writeln!(s, "{}p.recover_to(SYNC_{});", ind, sync).unwrap();
}
}
}
fn emit_leaf_inline(s: &mut String, leaf: &DispatchLeaf, sync: u32, next: u32) {
match leaf {
DispatchLeaf::Arm(t) => write!(s, "p.push_ret({}); cur = {}; ", next, t).unwrap(),
DispatchLeaf::Fallthrough => write!(s, "cur = {}; ", next).unwrap(),
DispatchLeaf::Error => {
write!(
s,
"cur = {}; p.error_here(\"unexpected token\"); p.recover_to(SYNC_{}); ",
next, sync
)
.unwrap();
}
}
}
fn emit_step(s: &mut String, table: &StateTable) {
let skip_kinds: Vec<i16> = table
.tokens
.iter()
.filter(|t| t.skip)
.map(|t| t.kind)
.collect();
let has_skips = !skip_kinds.is_empty();
writeln!(s, "impl parsuna_rt::Drive<K> for Grammar {{").unwrap();
writeln!(s, " type TokenKind = TokenKind;").unwrap();
writeln!(s, " type RuleKind = RuleKind;").unwrap();
writeln!(s, " const HAS_SKIPS: bool = {};", has_skips).unwrap();
writeln!(s).unwrap();
writeln!(s, " #[inline(always)]").unwrap();
writeln!(s, " fn is_skip(kind: TokenKind) -> bool {{").unwrap();
if has_skips {
let pat = skip_kinds
.iter()
.map(|k| token_variant(table, *k))
.collect::<Vec<_>>()
.join(" | ");
writeln!(s, " matches!(kind, {})", pat).unwrap();
} else {
writeln!(s, " let _ = kind; false").unwrap();
}
writeln!(s, " }}").unwrap();
writeln!(s).unwrap();
writeln!(s, " #[inline]").unwrap();
writeln!(
s,
" fn drive<'a, L: LexerBackend<'a, Self::TokenKind>>(p: &mut parsuna_rt::Parser<'a, L, K, Self>) {{"
)
.unwrap();
writeln!(s, " let mut cur = p.state();").unwrap();
writeln!(
s,
" while p.queue_is_empty() && cur != TERMINATED {{"
)
.unwrap();
writeln!(s, " match cur {{").unwrap();
for state in table.states.values() {
writeln!(s, " {} => {{ // {}", state.id, state.label).unwrap();
let ind = " ";
for op in &state.ops {
emit_op(s, table, state.id, op, ind);
}
writeln!(s, " }}").unwrap();
}
writeln!(s, " _ => unreachable!(\"unknown state\"),").unwrap();
writeln!(s, " }}").unwrap();
writeln!(s, " }}").unwrap();
writeln!(s, " p.set_state(cur);").unwrap();
writeln!(s, " }}").unwrap();
writeln!(s, "}}").unwrap();
writeln!(s).unwrap();
}
fn emit_public_api(s: &mut String, st: &StateTable) {
if !st.entry_states.is_empty() {
writeln!(s).unwrap();
writeln!(s).unwrap();
for (name, _) in &st.entry_states {
writeln!(
s,
"/// Parse the `{name}` rule from an in-memory string."
)
.unwrap();
writeln!(
s,
"///"
)
.unwrap();
writeln!(
s,
"/// Zero-copy: tokens borrow their text from `src`."
)
.unwrap();
writeln!(
s,
"pub fn parse_{name}_from_str<'a>(src: &'a str) -> Parser<'a, Scanner<'a, TokenKind>> {{",
name = name
)
.unwrap();
writeln!(
s,
" Parser::new(Scanner::new(src, &LEXER_CONFIG), ENTRY_{upper})",
upper = name.to_uppercase()
)
.unwrap();
writeln!(s, "}}").unwrap();
writeln!(s).unwrap();
writeln!(
s,
"/// Parse the `{name}` rule from any [`Read`] source."
)
.unwrap();
writeln!(
s,
"///"
)
.unwrap();
writeln!(
s,
"/// Streaming: tokens own their text; memory use stays bounded regardless of"
)
.unwrap();
writeln!(
s,
"/// input size."
)
.unwrap();
writeln!(
s,
"pub fn parse_{name}_from_reader<R: Read>(reader: R) -> Parser<'static, StreamingLexer<R, TokenKind>> {{",
name = name
)
.unwrap();
writeln!(
s,
" Parser::new(StreamingLexer::new(reader, &LEXER_CONFIG), ENTRY_{upper})",
upper = name.to_uppercase()
)
.unwrap();
writeln!(s, "}}").unwrap();
writeln!(s).unwrap();
}
}
}