use std::fmt::Write;
use std::path::PathBuf;
use crate::codegen::common::screaming_snake;
use crate::codegen::EmittedFile;
use crate::lowering::{DispatchLeaf, DispatchTree, Op, StateTable};
pub fn emit(st: &StateTable) -> Vec<EmittedFile> {
let stem = if st.grammar_name.is_empty() {
"parser".to_string()
} else {
st.grammar_name.clone()
};
let upper = stem.to_ascii_uppercase();
let mut h = String::new();
emit_header(&mut h, st, &stem, &upper);
let mut c = String::new();
emit_impl(&mut c, st, &stem, &upper);
vec![
EmittedFile {
path: PathBuf::from(format!("{}.h", stem)),
contents: h,
},
EmittedFile {
path: PathBuf::from(format!("{}.c", stem)),
contents: c,
},
]
}
fn emit_header(h: &mut String, st: &StateTable, stem: &str, upper: &str) {
writeln!(h, "/* Generated by parsuna. Do not edit by hand.").unwrap();
writeln!(h, " *").unwrap();
writeln!(
h,
" * Pull-based, recoverable parser for the `{stem}` grammar. This header"
)
.unwrap();
writeln!(
h,
" * is self-contained: it exposes the {stem}_* API and has no dependency"
)
.unwrap();
writeln!(
h,
" * on the internal parsuna runtime. Build against the emitted {stem}.c,"
)
.unwrap();
writeln!(
h,
" * which pulls in the header-only runtime internally."
)
.unwrap();
writeln!(h, " *").unwrap();
writeln!(
h,
" * Usage: p = {stem}_parser_new_<rule>_from_string(src, len);"
)
.unwrap();
writeln!(
h,
" * or p = {stem}_parser_new_<rule>_from_read_fn(fn, ctx);"
)
.unwrap();
writeln!(
h,
" * while ({stem}_next(p, &ev)) {{ ... }}"
)
.unwrap();
writeln!(h, " * {stem}_parser_free(p);").unwrap();
writeln!(h, " */").unwrap();
writeln!(h, "#ifndef {upper}_H").unwrap();
writeln!(h, "#define {upper}_H").unwrap();
writeln!(h, "#include <stddef.h>").unwrap();
writeln!(h, "#include <stdint.h>").unwrap();
writeln!(h, "#ifdef __cplusplus").unwrap();
writeln!(h, "extern \"C\" {{").unwrap();
writeln!(h, "#endif").unwrap();
writeln!(h).unwrap();
writeln!(h, "/* Source position: byte offset plus 1-based line/column. */").unwrap();
writeln!(
h,
"typedef struct {{ uint32_t offset, line, column; }} {stem}_Pos;"
)
.unwrap();
writeln!(h).unwrap();
writeln!(h, "/* Half-open span [start, end) over the source. */").unwrap();
writeln!(
h,
"typedef struct {{ {stem}_Pos start, end; }} {stem}_Span;"
)
.unwrap();
writeln!(h).unwrap();
writeln!(
h,
"/* A lexed token: kind, span, and matched text."
)
.unwrap();
writeln!(
h,
" *"
)
.unwrap();
writeln!(
h,
" * In *reader* mode ({stem}_parser_new_<rule>_from_read_fn) `text` is an"
)
.unwrap();
writeln!(
h,
" * allocation owned by the parser until the next call to {stem}_next and is"
)
.unwrap();
writeln!(
h,
" * NUL-terminated; in *string* mode ({stem}_parser_new_<rule>_from_string)"
)
.unwrap();
writeln!(
h,
" * `text` points directly into the caller's source buffer (zero-copy) and"
)
.unwrap();
writeln!(
h,
" * is NOT NUL-terminated. Use `text_len` in both modes. */"
)
.unwrap();
writeln!(
h,
"typedef struct {{ int kind; {stem}_Span span; char *text; size_t text_len; }} {stem}_Token;"
)
.unwrap();
writeln!(h).unwrap();
writeln!(
h,
"/* A recoverable error. `message` is owned by the parser until the next"
)
.unwrap();
writeln!(h, " * call to {stem}_next. */").unwrap();
writeln!(
h,
"typedef struct {{ char *message; {stem}_Span span; }} {stem}_Error;"
)
.unwrap();
writeln!(h).unwrap();
writeln!(
h,
"/* Event discriminator: ENTER/EXIT delimit a rule subtree, TOKEN carries"
)
.unwrap();
writeln!(
h,
" * a consumed token, ERROR is a recoverable diagnostic. */"
)
.unwrap();
writeln!(h, "typedef enum {{").unwrap();
writeln!(h, " {upper}_EV_ENTER,").unwrap();
writeln!(h, " {upper}_EV_EXIT,").unwrap();
writeln!(h, " {upper}_EV_TOKEN,").unwrap();
writeln!(h, " {upper}_EV_ERROR").unwrap();
writeln!(h, "}} {stem}_EventTag;").unwrap();
writeln!(h).unwrap();
writeln!(
h,
"/* A single parse event; inspect `tag` to decide which payload field to read. */"
)
.unwrap();
writeln!(h, "typedef struct {{").unwrap();
writeln!(h, " {stem}_EventTag tag;").unwrap();
writeln!(h, " int rule;").unwrap();
writeln!(h, " {stem}_Pos pos;").unwrap();
writeln!(h, " {stem}_Token token;").unwrap();
writeln!(h, " {stem}_Error error;").unwrap();
writeln!(h, "}} {stem}_Event;").unwrap();
writeln!(h).unwrap();
writeln!(
h,
"/* Reader callback: fill up to `max` bytes into `out`, return number of"
)
.unwrap();
writeln!(
h,
" * bytes written (0 for EOF, negative to signal an error). */"
)
.unwrap();
writeln!(
h,
"typedef int (*{stem}_ReadFn)(void *ctx, char *out, int max);"
)
.unwrap();
writeln!(h).unwrap();
writeln!(
h,
"/* Opaque parser state. Obtain via {stem}_parser_new_<rule>_from_string or"
)
.unwrap();
writeln!(
h,
" * {stem}_parser_new_<rule>_from_read_fn; free via {stem}_parser_free. */"
)
.unwrap();
writeln!(h, "typedef struct {stem}_Parser {stem}_Parser;").unwrap();
writeln!(h).unwrap();
writeln!(
h,
"/* Token kinds this grammar can emit. TK_EOF/TK_ERROR are runtime"
)
.unwrap();
writeln!(
h,
" * sentinels; the rest come from the grammar's `token` declarations. */"
)
.unwrap();
writeln!(h, "typedef enum {{").unwrap();
writeln!(h, " {upper}_TK_EOF = 0,").unwrap();
writeln!(h, " {upper}_TK_ERROR = -1,").unwrap();
for t in &st.tokens {
writeln!(h, " {upper}_TK_{} = {},", t.name, t.kind).unwrap();
}
writeln!(h, "}} {stem}_TokenKind;").unwrap();
writeln!(h).unwrap();
writeln!(
h,
"/* One value per non-fragment rule. Attached via `rule` to"
)
.unwrap();
writeln!(
h,
" * {upper}_EV_ENTER/{upper}_EV_EXIT events so callers can identify each subtree. */"
)
.unwrap();
writeln!(h, "typedef enum {{").unwrap();
for (i, n) in st.rule_kinds.iter().enumerate() {
writeln!(h, " {upper}_RK_{} = {i},", screaming_snake(n)).unwrap();
}
if st.rule_kinds.is_empty() {
writeln!(h, " {upper}_RK__none").unwrap();
}
writeln!(h, "}} {stem}_RuleKind;").unwrap();
writeln!(h).unwrap();
writeln!(
h,
"/* Grammar-declared name for a token kind (or \"?\" if unknown). */"
)
.unwrap();
writeln!(h, "const char *{stem}_token_kind_name(int k);").unwrap();
writeln!(
h,
"/* Grammar-declared name for a rule kind (or \"?\" if unknown). */"
)
.unwrap();
writeln!(h, "const char *{stem}_rule_kind_name(int k);").unwrap();
writeln!(h).unwrap();
for (name, _) in &st.entry_states {
writeln!(
h,
"/* Zero-copy parser over the `{name}` rule, reading from an in-memory"
)
.unwrap();
writeln!(
h,
" * buffer. The caller keeps ownership of `src`, which must remain live"
)
.unwrap();
writeln!(
h,
" * and unmodified until {stem}_parser_free. `len` is the number of bytes."
)
.unwrap();
writeln!(
h,
" * Token.text fields point directly into `src` (not NUL-terminated). */"
)
.unwrap();
writeln!(
h,
"{stem}_Parser *{stem}_parser_new_{name}_from_string(const char *src, size_t len);"
)
.unwrap();
writeln!(h).unwrap();
writeln!(
h,
"/* Streaming parser over the `{name}` rule, pulling bytes through `read_fn`."
)
.unwrap();
writeln!(
h,
" * The runtime buffers and copies as needed; Token.text fields are owned"
)
.unwrap();
writeln!(
h,
" * and NUL-terminated. Returns a heap-allocated handle; free with"
)
.unwrap();
writeln!(h, " * {stem}_parser_free. */").unwrap();
writeln!(
h,
"{stem}_Parser *{stem}_parser_new_{name}_from_read_fn({stem}_ReadFn read_fn, void *ctx);"
)
.unwrap();
}
writeln!(h).unwrap();
writeln!(
h,
"/* Produce the next event into `*out`. Returns 1 on fresh event, 0 once"
)
.unwrap();
writeln!(
h,
" * the input is fully consumed. String fields inside `*out` are owned"
)
.unwrap();
writeln!(
h,
" * by `p` and stay valid only until the next call. */"
)
.unwrap();
writeln!(h, "int {stem}_next({stem}_Parser *p, {stem}_Event *out);").unwrap();
writeln!(h).unwrap();
writeln!(
h,
"/* Free every allocation owned by `p`, including `p` itself. */"
)
.unwrap();
writeln!(h, "void {stem}_parser_free({stem}_Parser *p);").unwrap();
writeln!(h).unwrap();
writeln!(h, "#ifdef __cplusplus").unwrap();
writeln!(h, "}}").unwrap();
writeln!(h, "#endif").unwrap();
writeln!(h, "#endif").unwrap();
}
fn emit_impl(c: &mut String, st: &StateTable, stem: &str, upper: &str) {
writeln!(c, "#include \"{stem}.h\"").unwrap();
writeln!(c, "#include <stdlib.h>").unwrap();
writeln!(c, "#include <string.h>").unwrap();
writeln!(c).unwrap();
writeln!(c, "#define PARSUNA_K {}", st.k).unwrap();
writeln!(c, "#define PARSUNA_EOF_KIND {upper}_TK_EOF").unwrap();
writeln!(c, "#define PARSUNA_ERROR_KIND {upper}_TK_ERROR").unwrap();
writeln!(c, "#include \"parsuna_rt.h\"").unwrap();
writeln!(c).unwrap();
writeln!(c, "/* Compile-time check that the public types line up byte-for-byte").unwrap();
writeln!(c, " * with the runtime's internal ones, so pointer casts are sound. */").unwrap();
writeln!(
c,
"_Static_assert(sizeof({stem}_Pos) == sizeof(Pos), \"Pos layout mismatch\");"
)
.unwrap();
writeln!(
c,
"_Static_assert(sizeof({stem}_Span) == sizeof(Span), \"Span layout mismatch\");"
)
.unwrap();
writeln!(
c,
"_Static_assert(sizeof({stem}_Token) == sizeof(Token), \"Token layout mismatch\");"
)
.unwrap();
writeln!(
c,
"_Static_assert(sizeof({stem}_Error) == sizeof(Error), \"Error layout mismatch\");"
)
.unwrap();
writeln!(
c,
"_Static_assert(sizeof({stem}_Event) == sizeof(Event), \"Event layout mismatch\");"
)
.unwrap();
writeln!(c).unwrap();
emit_name_tables(c, st, stem, upper);
emit_dfa(c, st);
emit_tables(c, st, upper);
emit_skip(c, st, upper);
emit_step(c, st, stem, upper);
emit_public_api(c, st, stem, upper);
}
fn emit_name_tables(c: &mut String, st: &StateTable, stem: &str, upper: &str) {
writeln!(c, "const char *{stem}_token_kind_name(int k) {{").unwrap();
writeln!(c, " switch (k) {{").unwrap();
for t in &st.tokens {
writeln!(c, " case {upper}_TK_{}: return \"{}\";", t.name, t.name).unwrap();
}
writeln!(c, " case {upper}_TK_ERROR: return \"ERROR\";").unwrap();
writeln!(c, " case {upper}_TK_EOF: return \"EOF\";").unwrap();
writeln!(c, " default: return \"?\";").unwrap();
writeln!(c, " }}").unwrap();
writeln!(c, "}}").unwrap();
writeln!(c).unwrap();
writeln!(c, "const char *{stem}_rule_kind_name(int k) {{").unwrap();
writeln!(c, " switch (k) {{").unwrap();
for n in &st.rule_kinds {
writeln!(
c,
" case {upper}_RK_{}: return \"{n}\";",
screaming_snake(n)
)
.unwrap();
}
writeln!(c, " default: return \"?\";").unwrap();
writeln!(c, " }}").unwrap();
writeln!(c, "}}").unwrap();
writeln!(c).unwrap();
}
fn emit_dfa(c: &mut String, st: &StateTable) {
let dfa = &st.lexer_dfa;
writeln!(c, "static const uint32_t dfa_trans[] = {{").unwrap();
for state in &dfa.states {
write!(c, " ").unwrap();
for (j, t) in state.trans.iter().enumerate() {
if j == 255 {
write!(c, "{},", t).unwrap();
} else {
write!(c, "{}, ", t).unwrap();
}
}
writeln!(c).unwrap();
}
writeln!(c, "}};").unwrap();
writeln!(c, "static const uint16_t dfa_accept[] = {{").unwrap();
write!(c, " ").unwrap();
for (i, state) in dfa.states.iter().enumerate() {
let v = state.accept.unwrap_or(0);
if i == dfa.states.len() - 1 {
write!(c, "{},", v).unwrap();
} else {
write!(c, "{}, ", v).unwrap();
}
}
writeln!(c).unwrap();
writeln!(c, "}};").unwrap();
writeln!(
c,
"static const DfaConfig dfa_config = {{ {}u, dfa_trans, dfa_accept }};",
dfa.start
)
.unwrap();
writeln!(c).unwrap();
}
fn emit_tables(c: &mut String, st: &StateTable, upper: &str) {
for (i, f) in st.first_sets.iter().enumerate() {
for (j, seq) in f.iter().enumerate() {
let parts: Vec<String> = seq.iter().map(|t| c_token_name(st, upper, *t)).collect();
let joined = parts.join(", ");
let sep = if parts.is_empty() { "" } else { ", " };
writeln!(
c,
"static const int FIRST_{i}_{j}[] = {{ {joined}{sep}SENTINEL }};"
)
.unwrap();
}
write!(c, "static const int *const FIRST_{i}[] = {{").unwrap();
for j in 0..f.len() {
write!(c, " FIRST_{i}_{j},").unwrap();
}
writeln!(c, " NULL }};").unwrap();
}
for (i, f) in st.sync_sets.iter().enumerate() {
let s: Vec<String> = f.iter().map(|t| c_token_name(st, upper, *t)).collect();
let joined = s.join(", ");
let sep = if s.is_empty() { "" } else { ", " };
writeln!(
c,
"static const int SYNC_{i}[] = {{ {joined}{sep}SENTINEL }};"
)
.unwrap();
}
writeln!(c).unwrap();
}
fn emit_skip(c: &mut String, st: &StateTable, upper: &str) {
writeln!(c, "static int is_skip(int kind) {{").unwrap();
let skips: Vec<String> = st
.tokens
.iter()
.filter(|t| t.skip)
.map(|t| format!("kind == {}", c_token_name(st, upper, t.kind)))
.collect();
if skips.is_empty() {
writeln!(c, " (void)kind; return 0;").unwrap();
} else {
writeln!(c, " return {};", skips.join(" || ")).unwrap();
}
writeln!(c, "}}").unwrap();
writeln!(c).unwrap();
}
fn emit_step(c: &mut String, st: &StateTable, _stem: &str, upper: &str) {
writeln!(c, "static void drive(Parser *p) {{").unwrap();
writeln!(c, " int cur = get_state(p);").unwrap();
writeln!(
c,
" while (queue_empty(p) && cur != TERMINATED) {{"
)
.unwrap();
writeln!(c, " switch (cur) {{").unwrap();
for state in st.states.values() {
writeln!(c, " case {}: {{ /* {} */", state.id, state.label).unwrap();
for op in &state.ops {
emit_op(c, st, upper, op, state.id);
}
writeln!(c, " break;").unwrap();
writeln!(c, " }}").unwrap();
}
writeln!(c, " }}").unwrap();
writeln!(c, " }}").unwrap();
writeln!(c, " set_state(p, cur);").unwrap();
writeln!(c, "}}").unwrap();
writeln!(c).unwrap();
}
fn c_token_name(st: &StateTable, upper: &str, kind: i16) -> String {
if kind == 0 {
return format!("{upper}_TK_EOF");
}
if kind == -1 {
return format!("{upper}_TK_ERROR");
}
match st.tokens.iter().find(|t| t.kind == kind) {
Some(t) => format!("{upper}_TK_{}", t.name),
None => panic!("unknown token id {} while emitting C backend", kind),
}
}
fn emit_op(c: &mut String, st: &StateTable, upper: &str, op: &Op, self_id: u32) {
match op {
Op::Enter(k) => {
let name = st.rule_kinds.get(*k as usize).unwrap_or_else(|| {
panic!("unknown rule kind id {} while emitting C backend", k)
});
writeln!(
c,
" emit_enter(p, {upper}_RK_{});",
screaming_snake(name)
)
.unwrap()
}
Op::Exit(k) => {
let name = st.rule_kinds.get(*k as usize).unwrap_or_else(|| {
panic!("unknown rule kind id {} while emitting C backend", k)
});
writeln!(
c,
" emit_exit(p, {upper}_RK_{});",
screaming_snake(name)
)
.unwrap()
}
Op::Expect {
kind,
token_name,
sync,
} => {
let name = c_token_name(st, upper, *kind);
writeln!(
c,
" try_consume(p, {name}, SYNC_{sync}, \"{token_name}\");"
)
.unwrap()
}
Op::PushRet(r) => writeln!(c, " push_ret(p, {r});").unwrap(),
Op::Jump(n) => writeln!(c, " cur = {n};").unwrap(),
Op::Ret => {
writeln!(c, " cur = pop_ret(p);").unwrap();
}
Op::Star { first, body, next } => {
writeln!(
c,
" if (matches_first(p, FIRST_{first})) {{ push_ret(p, {self_id}); cur = {body}; }} else {{ cur = {next}; }}"
)
.unwrap();
}
Op::Opt { first, body, next } => {
writeln!(
c,
" if (matches_first(p, FIRST_{first})) {{ push_ret(p, {next}); cur = {body}; }} else {{ cur = {next}; }}"
)
.unwrap();
}
Op::Dispatch { tree, sync, next } => {
emit_dispatch_tree(c, st, upper, tree, *sync, *next, " ");
}
}
}
fn emit_dispatch_tree(
c: &mut String,
st: &StateTable,
upper: &str,
tree: &DispatchTree,
sync: u32,
next: u32,
ind: &str,
) {
match tree {
DispatchTree::Leaf(leaf) => {
write!(c, "{}{{ ", ind).unwrap();
emit_leaf_inline(c, leaf, sync, next);
writeln!(c, "}}").unwrap();
}
DispatchTree::Switch {
depth,
arms,
default,
} => {
writeln!(c, "{}switch (look(p, {}).kind) {{", ind, depth).unwrap();
let inner = format!("{} ", ind);
for (kind, sub) in arms {
let name = c_token_name(st, upper, *kind);
match sub {
DispatchTree::Leaf(leaf) => {
write!(c, "{}case {}: {{ ", inner, name).unwrap();
emit_leaf_inline(c, leaf, sync, next);
writeln!(c, "}} break;").unwrap();
}
_ => {
writeln!(c, "{}case {}: {{", inner, name).unwrap();
emit_dispatch_tree(c, st, upper, sub, sync, next, &format!("{} ", inner));
writeln!(c, "{}}} break;", inner).unwrap();
}
}
}
write!(c, "{}default: {{ ", inner).unwrap();
emit_leaf_inline(c, default, sync, next);
writeln!(c, "}} break;").unwrap();
writeln!(c, "{}}}", ind).unwrap();
}
}
}
fn emit_leaf_inline(c: &mut String, leaf: &DispatchLeaf, sync: u32, next: u32) {
match leaf {
DispatchLeaf::Arm(t) => {
write!(c, "push_ret(p, {next}); cur = {t}; ").unwrap()
}
DispatchLeaf::Fallthrough => write!(c, "cur = {next}; ").unwrap(),
DispatchLeaf::Error => write!(
c,
"cur = {next}; error_here(p, \"unexpected token\"); recover_to(p, SYNC_{sync}); "
)
.unwrap(),
}
}
fn emit_public_api(c: &mut String, st: &StateTable, stem: &str, upper: &str) {
let _ = upper;
for (name, id) in &st.entry_states {
writeln!(c, "{stem}_Parser *{stem}_parser_new_{name}_from_read_fn({stem}_ReadFn read_fn, void *ctx) {{").unwrap();
writeln!(c, " Parser *p = (Parser*)malloc(sizeof *p);").unwrap();
writeln!(
c,
" parser_init_from_read_fn(p, {id}, read_fn, ctx);"
)
.unwrap();
writeln!(c, " return ({stem}_Parser*)p;").unwrap();
writeln!(c, "}}").unwrap();
writeln!(c, "{stem}_Parser *{stem}_parser_new_{name}_from_string(const char *src, size_t len) {{").unwrap();
writeln!(c, " Parser *p = (Parser*)malloc(sizeof *p);").unwrap();
writeln!(
c,
" parser_init_from_string(p, {id}, src, len);"
)
.unwrap();
writeln!(c, " return ({stem}_Parser*)p;").unwrap();
writeln!(c, "}}").unwrap();
}
writeln!(c).unwrap();
writeln!(
c,
"int {stem}_next({stem}_Parser *p, {stem}_Event *out) {{"
)
.unwrap();
writeln!(
c,
" return parser_next((Parser*)p, (Event*)out);"
)
.unwrap();
writeln!(c, "}}").unwrap();
writeln!(c).unwrap();
writeln!(c, "void {stem}_parser_free({stem}_Parser *p) {{").unwrap();
writeln!(c, " if (!p) return;").unwrap();
writeln!(c, " parser_destroy((Parser*)p);").unwrap();
writeln!(c, " free(p);").unwrap();
writeln!(c, "}}").unwrap();
}