use std::fmt::Write;
use std::path::PathBuf;
use crate::codegen::common::pascal;
use crate::codegen::EmittedFile;
use crate::lowering::{DispatchLeaf, DispatchTree, Op, StateTable};
pub fn emit(st: &StateTable) -> Vec<EmittedFile> {
let mut s = String::new();
emit_header(&mut s, st);
emit_constants(&mut s, st);
emit_dfa(&mut s, st);
emit_tables(&mut s, st);
emit_drive(&mut s, st);
emit_public_api(&mut s, st);
let stem = if st.grammar_name.is_empty() {
"parser".to_string()
} else {
st.grammar_name.clone()
};
vec![EmittedFile {
path: PathBuf::from(format!("{}.go", stem)),
contents: s,
}]
}
fn emit_header(s: &mut String, st: &StateTable) {
let name = if st.grammar_name.is_empty() {
"parser"
} else {
st.grammar_name.as_str()
};
writeln!(s, "// Code generated by parsuna; DO NOT EDIT.").unwrap();
writeln!(s, "//").unwrap();
writeln!(
s,
"// Pull-based, recoverable parser. Call one of the ParseXxx entry points"
)
.unwrap();
writeln!(
s,
"// and repeatedly call NextEvent on the returned *parsunart.Parser to walk"
)
.unwrap();
writeln!(s, "// the parse as a flat Event stream.").unwrap();
writeln!(s).unwrap();
writeln!(s, "package {}", name).unwrap();
writeln!(s).unwrap();
writeln!(s, "import (").unwrap();
writeln!(s, "\t\"io\"").unwrap();
writeln!(s).unwrap();
writeln!(s, "\trt \"parsuna.dev/parsuna-rt-go\"").unwrap();
writeln!(s, ")").unwrap();
writeln!(s).unwrap();
writeln!(s, "// Event types re-exported from the runtime.").unwrap();
writeln!(s, "type Pos = rt.Pos").unwrap();
writeln!(s, "type Span = rt.Span").unwrap();
writeln!(s, "type Token = rt.Token").unwrap();
writeln!(s, "type ParseError = rt.ParseError").unwrap();
writeln!(s, "type Event = rt.Event").unwrap();
writeln!(s, "type EventTag = rt.EventTag").unwrap();
writeln!(s, "const (").unwrap();
writeln!(s, "\tEvEnter = rt.EvEnter").unwrap();
writeln!(s, "\tEvExit = rt.EvExit").unwrap();
writeln!(s, "\tEvToken = rt.EvToken").unwrap();
writeln!(s, "\tEvError = rt.EvError").unwrap();
writeln!(s, ")").unwrap();
writeln!(s).unwrap();
}
fn emit_constants(s: &mut String, st: &StateTable) {
writeln!(s, "// TokenKind enumerates every token this grammar can emit.").unwrap();
writeln!(
s,
"// TkEof marks end-of-input and TkError is produced by the lexer when"
)
.unwrap();
writeln!(s, "// no pattern matches at the current position.").unwrap();
writeln!(s, "type TokenKind int16").unwrap();
writeln!(s, "const (").unwrap();
writeln!(s, "\tTkEof TokenKind = 0").unwrap();
writeln!(s, "\tTkError TokenKind = -1").unwrap();
for t in &st.tokens {
writeln!(s, "\tTk{} TokenKind = {}", pascal(&t.name), t.kind).unwrap();
}
writeln!(s, ")").unwrap();
writeln!(s).unwrap();
writeln!(
s,
"// TokenKindName returns the grammar-declared name of a token kind,"
)
.unwrap();
writeln!(s, "// or \"?\" if the kind is not recognised.").unwrap();
writeln!(s, "func TokenKindName(k TokenKind) string {{").unwrap();
writeln!(s, "\tswitch k {{").unwrap();
writeln!(s, "\tcase TkEof: return \"EOF\"").unwrap();
writeln!(s, "\tcase TkError: return \"ERROR\"").unwrap();
for t in &st.tokens {
writeln!(s, "\tcase Tk{}: return \"{}\"", pascal(&t.name), t.name).unwrap();
}
writeln!(s, "\t}}").unwrap();
writeln!(s, "\treturn \"?\"").unwrap();
writeln!(s, "}}").unwrap();
writeln!(s).unwrap();
writeln!(
s,
"// RuleKind enumerates this grammar's non-fragment rules."
)
.unwrap();
writeln!(s, "type RuleKind uint16").unwrap();
writeln!(s, "const (").unwrap();
for (i, n) in st.rule_kinds.iter().enumerate() {
writeln!(s, "\tRk{} RuleKind = {}", pascal(n), i).unwrap();
}
writeln!(s, ")").unwrap();
writeln!(s).unwrap();
writeln!(
s,
"// RuleKindName returns the grammar-declared name of a rule kind,"
)
.unwrap();
writeln!(s, "// or \"?\" if the kind is not recognised.").unwrap();
writeln!(s, "func RuleKindName(k RuleKind) string {{").unwrap();
writeln!(s, "\tswitch k {{").unwrap();
for n in &st.rule_kinds {
writeln!(s, "\tcase Rk{}: return \"{}\"", pascal(n), n).unwrap();
}
writeln!(s, "\t}}").unwrap();
writeln!(s, "\treturn \"?\"").unwrap();
writeln!(s, "}}").unwrap();
writeln!(s).unwrap();
}
fn token_const(st: &StateTable, kind: i16) -> String {
if kind == 0 {
return "TkEof".to_string();
}
if kind == -1 {
return "TkError".to_string();
}
match st.tokens.iter().find(|t| t.kind == kind) {
Some(t) => format!("Tk{}", pascal(&t.name)),
None => panic!("unknown token id {} while emitting Go backend", kind),
}
}
fn rule_const(st: &StateTable, kind: u16) -> String {
let name = st
.rule_kinds
.get(kind as usize)
.unwrap_or_else(|| panic!("unknown rule kind id {} while emitting Go backend", kind));
format!("Rk{}", pascal(name))
}
fn emit_dfa(s: &mut String, st: &StateTable) {
let dfa = &st.lexer_dfa;
writeln!(s, "var dfaTrans = [...]uint32{{").unwrap();
for state in &dfa.states {
write!(s, "\t").unwrap();
for (j, t) in state.trans.iter().enumerate() {
if j == 255 {
write!(s, "{},", t).unwrap();
} else {
write!(s, "{}, ", t).unwrap();
}
}
writeln!(s).unwrap();
}
writeln!(s, "}}").unwrap();
writeln!(s, "var dfaAccept = [...]uint16{{").unwrap();
write!(s, "\t").unwrap();
for (i, state) in dfa.states.iter().enumerate() {
let v = state.accept.unwrap_or(0);
if i == dfa.states.len() - 1 {
write!(s, "{},", v).unwrap();
} else {
write!(s, "{}, ", v).unwrap();
}
}
writeln!(s).unwrap();
writeln!(s, "}}").unwrap();
writeln!(
s,
"var lexerConfig = rt.DfaConfig{{Start: {}, Trans: dfaTrans[:], Accept: dfaAccept[:]}}",
dfa.start
)
.unwrap();
writeln!(s).unwrap();
write!(s, "var skipKinds = map[int16]struct{{}}{{").unwrap();
let skips: Vec<String> = st
.tokens
.iter()
.filter(|t| t.skip)
.map(|t| format!("int16({}): {{}}", token_const(st, t.kind)))
.collect();
s.push_str(&skips.join(", "));
writeln!(s, "}}").unwrap();
writeln!(s, "func isSkip(k int16) bool {{ _, ok := skipKinds[k]; return ok }}").unwrap();
writeln!(s).unwrap();
}
fn emit_tables(s: &mut String, st: &StateTable) {
writeln!(s, "const K = {}", st.k).unwrap();
for (name, id) in &st.entry_states {
writeln!(s, "const entry{} = {}", capitalize(name), id).unwrap();
}
writeln!(s).unwrap();
writeln!(s, "var (").unwrap();
for (i, f) in st.first_sets.iter().enumerate() {
let seqs: Vec<String> = f
.iter()
.map(|seq| {
format!(
"{{{}}}",
seq.iter()
.map(|t| format!("int16({})", token_const(st, *t)))
.collect::<Vec<_>>()
.join(", ")
)
})
.collect();
writeln!(s, "\tfirst_{} = [][]int16{{{}}}", i, seqs.join(", ")).unwrap();
}
for (i, f) in st.sync_sets.iter().enumerate() {
writeln!(
s,
"\tsync_{} = []int16{{{}}}",
i,
f.iter()
.map(|t| format!("int16({})", token_const(st, *t)))
.collect::<Vec<_>>()
.join(", ")
)
.unwrap();
}
writeln!(s, ")").unwrap();
writeln!(s).unwrap();
}
fn emit_drive(s: &mut String, st: &StateTable) {
writeln!(s, "func drive(p *rt.Parser) {{").unwrap();
writeln!(s, "\tcur := p.State()").unwrap();
writeln!(s, "\tfor p.QueueIsEmpty() && cur != rt.Terminated {{").unwrap();
writeln!(s, "\t\tswitch cur {{").unwrap();
for state in st.states.values() {
writeln!(s, "\t\tcase {}: // {}", state.id, state.label).unwrap();
for op in &state.ops {
emit_op(s, st, op, state.id);
}
}
writeln!(s, "\t\t}}").unwrap();
writeln!(s, "\t}}").unwrap();
writeln!(s, "\tp.SetState(cur)").unwrap();
writeln!(s, "}}").unwrap();
writeln!(s).unwrap();
}
fn emit_op(s: &mut String, st: &StateTable, op: &Op, self_id: u32) {
match op {
Op::Enter(k) => {
writeln!(
s,
"\t\t\tp.Enter(uint16({}))",
rule_const(st, *k)
)
.unwrap();
}
Op::Exit(k) => {
writeln!(
s,
"\t\t\tp.Exit(uint16({}))",
rule_const(st, *k)
)
.unwrap();
}
Op::Expect {
kind,
token_name,
sync,
} => {
writeln!(
s,
"\t\t\tp.TryConsume(int16({}), sync_{}, {:?})",
token_const(st, *kind),
sync,
token_name
)
.unwrap();
}
Op::PushRet(r) => {
writeln!(s, "\t\t\tp.PushRet({})", r).unwrap();
}
Op::Jump(n) => {
writeln!(s, "\t\t\tcur = {}", n).unwrap();
}
Op::Ret => {
writeln!(s, "\t\t\tcur = p.PopRet()").unwrap();
}
Op::Star { first, body, next } => {
writeln!(s, "\t\t\tif p.MatchesFirst(first_{}) {{ p.PushRet({}); cur = {} }} else {{ cur = {} }}", first, self_id, body, next).unwrap();
}
Op::Opt { first, body, next } => {
writeln!(s, "\t\t\tif p.MatchesFirst(first_{}) {{ p.PushRet({}); cur = {} }} else {{ cur = {} }}", first, next, body, next).unwrap();
}
Op::Dispatch { tree, sync, next } => {
emit_dispatch_tree(s, st, tree, *sync, *next, "\t\t\t");
}
}
}
fn emit_dispatch_tree(
s: &mut String,
st: &StateTable,
tree: &DispatchTree,
sync: u32,
next: u32,
ind: &str,
) {
match tree {
DispatchTree::Leaf(leaf) => {
write!(s, "{}", ind).unwrap();
emit_leaf_inline(s, leaf, sync, next);
writeln!(s).unwrap();
}
DispatchTree::Switch {
depth,
arms,
default,
} => {
writeln!(s, "{}switch p.Look({}).Kind {{", ind, depth).unwrap();
let inner = format!("{}\t", ind);
let body_ind = format!("{}\t", inner);
for (kind, sub) in arms {
writeln!(
s,
"{}case int16({}):",
inner,
token_const(st, *kind)
)
.unwrap();
match sub {
DispatchTree::Leaf(leaf) => {
write!(s, "{}", body_ind).unwrap();
emit_leaf_inline(s, leaf, sync, next);
writeln!(s).unwrap();
}
_ => emit_dispatch_tree(s, st, sub, sync, next, &body_ind),
}
}
writeln!(s, "{}default:", inner).unwrap();
write!(s, "{}", body_ind).unwrap();
emit_leaf_inline(s, default, sync, next);
writeln!(s).unwrap();
writeln!(s, "{}}}", ind).unwrap();
}
}
}
fn emit_leaf_inline(s: &mut String, leaf: &DispatchLeaf, sync: u32, next: u32) {
match leaf {
DispatchLeaf::Arm(t) => {
write!(s, "p.PushRet({}); cur = {}", next, t).unwrap();
}
DispatchLeaf::Fallthrough => {
write!(s, "cur = {}", next).unwrap();
}
DispatchLeaf::Error => {
write!(
s,
"cur = {}; p.ErrorHere(\"unexpected token\"); p.RecoverTo(sync_{})",
next, sync
)
.unwrap();
}
}
}
fn emit_public_api(s: &mut String, st: &StateTable) {
writeln!(
s,
"var parserConfig = rt.Config{{K: K, EofKind: int16(TkEof), IsSkip: isSkip, Drive: drive}}"
)
.unwrap();
writeln!(s).unwrap();
for (name, _) in &st.entry_states {
writeln!(
s,
"// Parse{cap} returns a Parser that parses the `{name}` rule from r.",
cap = capitalize(name),
name = name,
)
.unwrap();
writeln!(
s,
"func Parse{cap}(r io.Reader) *rt.Parser {{",
cap = capitalize(name)
)
.unwrap();
writeln!(
s,
"\tlex := rt.NewLexer(r, &lexerConfig, int16(TkEof), int16(TkError))"
)
.unwrap();
writeln!(
s,
"\treturn rt.NewParser(lex, entry{}, parserConfig)",
capitalize(name)
)
.unwrap();
writeln!(s, "}}").unwrap();
writeln!(s).unwrap();
}
}
fn capitalize(s: &str) -> String {
let mut out = String::with_capacity(s.len());
let mut first = true;
for c in s.chars() {
if first {
out.extend(c.to_uppercase());
first = false;
} else {
out.push(c);
}
}
out
}