use std::fmt::Write;
use std::path::PathBuf;
use crate::codegen::EmittedFile;
use crate::lowering::{DispatchLeaf, DispatchTree, Op, StateTable};
pub fn emit(st: &StateTable) -> Vec<EmittedFile> {
let ns = if st.grammar_name.is_empty() {
"Parser".to_string()
} else {
pascal_case(&st.grammar_name)
};
let mut s = String::new();
writeln!(&mut s, "// Generated by parsuna — do not edit by hand.").unwrap();
writeln!(&mut s, "//").unwrap();
writeln!(
&mut s,
"// Pull-based, recoverable parser. Call one of the static Grammar.ParseXxx"
)
.unwrap();
writeln!(
&mut s,
"// entry points and iterate the returned Parser (or call NextEvent) to"
)
.unwrap();
writeln!(
&mut s,
"// walk the parse as a flat Event stream. Requires the Parsuna.Runtime library."
)
.unwrap();
writeln!(&mut s, "#nullable enable").unwrap();
writeln!(&mut s, "using System;").unwrap();
writeln!(&mut s, "using System.IO;").unwrap();
writeln!(&mut s, "using System.Text;").unwrap();
writeln!(&mut s, "using Parsuna.Runtime;").unwrap();
writeln!(&mut s).unwrap();
writeln!(&mut s, "namespace {};", ns).unwrap();
writeln!(&mut s).unwrap();
emit_constants(&mut s, st);
emit_dfa(&mut s, st);
emit_tables(&mut s, st);
emit_grammar(&mut s, st);
let file = PathBuf::from(format!("{}/Grammar.cs", ns));
vec![EmittedFile {
path: file,
contents: s,
}]
}
fn pascal_case(s: &str) -> String {
let mut out = String::new();
let mut upper = true;
for c in s.chars() {
if c == '_' || c == '-' {
upper = true;
continue;
}
if upper {
out.extend(c.to_uppercase());
upper = false;
} else {
out.push(c);
}
}
out
}
fn emit_constants(s: &mut String, st: &StateTable) {
writeln!(
s,
"/// <summary>Token kinds this grammar can emit. <c>Eof</c>/<c>Error</c> are runtime"
)
.unwrap();
writeln!(
s,
"/// sentinels; the rest come from the grammar's <c>token</c> declarations.</summary>"
)
.unwrap();
writeln!(s, "public enum TokenKind : short {{").unwrap();
writeln!(s, " Eof = 0,").unwrap();
writeln!(s, " Error = -1,").unwrap();
for t in &st.tokens {
writeln!(
s,
" {} = {},",
pascal_case(&t.name.to_lowercase()),
t.kind
)
.unwrap();
}
writeln!(s, "}}").unwrap();
writeln!(s).unwrap();
writeln!(s, "public static class TokenKindEx {{").unwrap();
writeln!(
s,
" public static string DisplayName(this TokenKind k) => k switch {{"
)
.unwrap();
writeln!(s, " TokenKind.Eof => \"EOF\",").unwrap();
writeln!(s, " TokenKind.Error => \"ERROR\",").unwrap();
for t in &st.tokens {
writeln!(
s,
" TokenKind.{} => \"{}\",",
pascal_case(&t.name.to_lowercase()),
t.name
)
.unwrap();
}
writeln!(s, " _ => \"?\",").unwrap();
writeln!(s, " }};").unwrap();
writeln!(s, "}}").unwrap();
writeln!(s).unwrap();
writeln!(
s,
"/// <summary>One variant per non-fragment grammar rule. Attached to enter/exit"
)
.unwrap();
writeln!(
s,
"/// events so consumers can identify the subtree each pair delimits.</summary>"
)
.unwrap();
writeln!(s, "public enum RuleKind : ushort {{").unwrap();
for (i, n) in st.rule_kinds.iter().enumerate() {
writeln!(s, " {} = {},", pascal_case(n), i).unwrap();
}
writeln!(s, "}}").unwrap();
writeln!(s).unwrap();
writeln!(s, "public static class RuleKindEx {{").unwrap();
writeln!(
s,
" public static string DisplayName(this RuleKind k) => k switch {{"
)
.unwrap();
for n in &st.rule_kinds {
writeln!(s, " RuleKind.{} => \"{}\",", pascal_case(n), n).unwrap();
}
writeln!(s, " _ => \"?\",").unwrap();
writeln!(s, " }};").unwrap();
writeln!(s, "}}").unwrap();
writeln!(s).unwrap();
}
fn token_short(st: &StateTable, kind: i16) -> String {
if kind == 0 {
return "(short)TokenKind.Eof".to_string();
}
if kind == -1 {
return "(short)TokenKind.Error".to_string();
}
match st.tokens.iter().find(|t| t.kind == kind) {
Some(t) => format!(
"(short)TokenKind.{}",
pascal_case(&t.name.to_lowercase())
),
None => panic!("unknown token id {} while emitting C# backend", kind),
}
}
fn rule_id(st: &StateTable, kind: u16) -> String {
let name = st
.rule_kinds
.get(kind as usize)
.unwrap_or_else(|| panic!("unknown rule kind id {} while emitting C# backend", kind));
format!("(int)RuleKind.{}", pascal_case(name))
}
fn emit_dfa(s: &mut String, st: &StateTable) {
let dfa = &st.lexer_dfa;
writeln!(s, "internal static class DfaTables {{").unwrap();
writeln!(s, " public static readonly uint[] Trans = new uint[] {{").unwrap();
for state in &dfa.states {
write!(s, " ").unwrap();
for (j, t) in state.trans.iter().enumerate() {
if j == 255 {
write!(s, "{}u,", t).unwrap();
} else {
write!(s, "{}u, ", t).unwrap();
}
}
writeln!(s).unwrap();
}
writeln!(s, " }};").unwrap();
writeln!(
s,
" public static readonly ushort[] Accept = new ushort[] {{"
)
.unwrap();
write!(s, " ").unwrap();
for (i, state) in dfa.states.iter().enumerate() {
let v = state.accept.unwrap_or(0);
if i == dfa.states.len() - 1 {
write!(s, "{},", v).unwrap();
} else {
write!(s, "{}, ", v).unwrap();
}
}
writeln!(s).unwrap();
writeln!(s, " }};").unwrap();
writeln!(
s,
" public static readonly DfaConfig Lexer = new DfaConfig({}u, Trans, Accept);",
dfa.start
)
.unwrap();
writeln!(s, "}}").unwrap();
writeln!(s).unwrap();
}
fn emit_tables(s: &mut String, st: &StateTable) {
writeln!(s, "internal static class Tables {{").unwrap();
writeln!(s, " public const int K = {};", st.k).unwrap();
for (i, f) in st.first_sets.iter().enumerate() {
let seqs: Vec<String> = f
.iter()
.map(|seq| {
format!(
"new short[] {{ {} }}",
seq.iter()
.map(|t| token_short(st, *t))
.collect::<Vec<_>>()
.join(", ")
)
})
.collect();
writeln!(
s,
" public static readonly short[][] First{} = new short[][] {{ {} }};",
i,
seqs.join(", ")
)
.unwrap();
}
for (i, f) in st.sync_sets.iter().enumerate() {
writeln!(
s,
" public static readonly short[] Sync{} = new short[] {{ {} }};",
i,
f.iter()
.map(|t| token_short(st, *t))
.collect::<Vec<_>>()
.join(", ")
)
.unwrap();
}
let skips: Vec<String> = st
.tokens
.iter()
.filter(|t| t.skip)
.map(|t| format!("k == {}", token_short(st, t.kind)))
.collect();
let skip_body = if skips.is_empty() {
"false".to_string()
} else {
skips.join(" || ")
};
writeln!(
s,
" public static bool IsSkip(short k) => {};",
skip_body
)
.unwrap();
writeln!(s, "}}").unwrap();
writeln!(s).unwrap();
}
fn emit_grammar(s: &mut String, st: &StateTable) {
writeln!(s, "/// <summary>Grammar-specific entry points. Thin wrapper around Parsuna.Runtime.Parser.</summary>").unwrap();
writeln!(s, "public static class Grammar {{").unwrap();
for (name, id) in &st.entry_states {
writeln!(
s,
" private const int Entry{} = {};",
pascal_case(name),
id
)
.unwrap();
}
writeln!(s).unwrap();
writeln!(
s,
" private static readonly ParserConfig Config = new ParserConfig("
)
.unwrap();
writeln!(s, " Tables.K,").unwrap();
writeln!(s, " (short)TokenKind.Eof,").unwrap();
writeln!(s, " Tables.IsSkip,").unwrap();
writeln!(s, " Drive);").unwrap();
writeln!(s).unwrap();
writeln!(s, " private static Parser FromStream(Stream stream, int entry) =>").unwrap();
writeln!(
s,
" new Parser(new Lexer(stream, DfaTables.Lexer, (short)TokenKind.Eof, (short)TokenKind.Error), entry, Config);"
)
.unwrap();
writeln!(s).unwrap();
for (name, _) in &st.entry_states {
writeln!(
s,
" /// <summary>Parse the <c>{name}</c> rule from a byte <c>Stream</c>.</summary>",
)
.unwrap();
writeln!(
s,
" public static Parser Parse{cap}(Stream stream) => FromStream(stream, Entry{cap});",
cap = pascal_case(name)
)
.unwrap();
writeln!(
s,
" /// <summary>Parse the <c>{name}</c> rule from a UTF-8 string.</summary>",
)
.unwrap();
writeln!(
s,
" public static Parser Parse{cap}(string src) => Parse{cap}(new MemoryStream(Encoding.UTF8.GetBytes(src)));",
cap = pascal_case(name)
).unwrap();
writeln!(s).unwrap();
}
emit_drive(s, st);
writeln!(s, "}}").unwrap();
}
fn emit_drive(s: &mut String, st: &StateTable) {
writeln!(s, " private static void Drive(Parser p) {{").unwrap();
writeln!(s, " int cur = p.State();").unwrap();
writeln!(
s,
" while (p.QueueIsEmpty() && cur != Parser.Terminated) {{"
)
.unwrap();
writeln!(s, " switch (cur) {{").unwrap();
for state in st.states.values() {
writeln!(
s,
" case {}: {{ // {}",
state.id, state.label
)
.unwrap();
for op in &state.ops {
emit_op(s, st, op, state.id);
}
writeln!(s, " break;").unwrap();
writeln!(s, " }}").unwrap();
}
writeln!(
s,
" default: throw new InvalidOperationException($\"unknown state {{cur}}\");"
)
.unwrap();
writeln!(s, " }}").unwrap();
writeln!(s, " }}").unwrap();
writeln!(s, " p.SetState(cur);").unwrap();
writeln!(s, " }}").unwrap();
writeln!(s).unwrap();
}
fn emit_op(s: &mut String, st: &StateTable, op: &Op, self_id: u32) {
match op {
Op::Enter(k) => {
writeln!(s, " p.Enter({});", rule_id(st, *k)).unwrap();
}
Op::Exit(k) => {
writeln!(s, " p.Exit({});", rule_id(st, *k)).unwrap();
}
Op::Expect {
kind,
token_name,
sync,
} => {
writeln!(
s,
" p.TryConsume({}, Tables.Sync{}, \"{}\");",
token_short(st, *kind),
sync,
token_name
)
.unwrap();
}
Op::PushRet(r) => {
writeln!(s, " p.PushRet({});", r).unwrap();
}
Op::Jump(n) => {
writeln!(s, " cur = {};", n).unwrap();
}
Op::Ret => {
writeln!(s, " cur = p.PopRet();").unwrap();
}
Op::Star { first, body, next } => {
writeln!(s, " if (p.MatchesFirst(Tables.First{})) {{ p.PushRet({}); cur = {}; }}", first, self_id, body).unwrap();
writeln!(s, " else cur = {};", next).unwrap();
}
Op::Opt { first, body, next } => {
writeln!(s, " if (p.MatchesFirst(Tables.First{})) {{ p.PushRet({}); cur = {}; }}", first, next, body).unwrap();
writeln!(s, " else cur = {};", next).unwrap();
}
Op::Dispatch { tree, sync, next } => {
emit_dispatch_tree(s, st, tree, *sync, *next, " ");
}
}
}
fn emit_dispatch_tree(
s: &mut String,
st: &StateTable,
tree: &DispatchTree,
sync: u32,
next: u32,
ind: &str,
) {
match tree {
DispatchTree::Leaf(leaf) => {
write!(s, "{}{{ ", ind).unwrap();
emit_leaf_inline(s, leaf, sync, next);
writeln!(s, "}}").unwrap();
}
DispatchTree::Switch {
depth,
arms,
default,
} => {
writeln!(s, "{}switch (p.Look({}).Kind) {{", ind, depth).unwrap();
let inner = format!("{} ", ind);
for (kind, sub) in arms {
let lit = format!("(short){}", *kind);
match sub {
DispatchTree::Leaf(leaf) => {
write!(s, "{}case {}: {{ ", inner, lit).unwrap();
emit_leaf_inline(s, leaf, sync, next);
writeln!(s, "break; }}").unwrap();
}
_ => {
writeln!(s, "{}case {}: {{", inner, lit).unwrap();
emit_dispatch_tree(s, st, sub, sync, next, &format!("{} ", inner));
writeln!(s, "{} break;", inner).unwrap();
writeln!(s, "{}}}", inner).unwrap();
}
}
}
write!(s, "{}default: {{ ", inner).unwrap();
emit_leaf_inline(s, default, sync, next);
writeln!(s, "break; }}").unwrap();
writeln!(s, "{}}}", ind).unwrap();
}
}
}
fn emit_leaf_inline(s: &mut String, leaf: &DispatchLeaf, sync: u32, next: u32) {
match leaf {
DispatchLeaf::Arm(t) => write!(s, "p.PushRet({}); cur = {}; ", next, t).unwrap(),
DispatchLeaf::Fallthrough => write!(s, "cur = {}; ", next).unwrap(),
DispatchLeaf::Error => write!(
s,
"cur = {}; p.ErrorHere(\"unexpected token\"); p.RecoverTo(Tables.Sync{}); ",
next, sync
)
.unwrap(),
}
}