use std::fmt::Write;
use std::path::PathBuf;
use crate::codegen::common::pascal;
use crate::codegen::EmittedFile;
use crate::lowering::{DispatchLeaf, DispatchTree, Op, StateTable};
pub fn emit(st: &StateTable) -> Vec<EmittedFile> {
let mut s = String::new();
emit_header(&mut s, st);
emit_imports(&mut s);
emit_constants(&mut s, st);
emit_dfa(&mut s, st);
emit_tables(&mut s, st);
emit_drive(&mut s, st);
emit_public_api(&mut s, st);
let stem = if st.grammar_name.is_empty() {
"parser".to_string()
} else {
st.grammar_name.clone()
};
vec![EmittedFile {
path: PathBuf::from(format!("{}.ts", stem)),
contents: s,
}]
}
fn emit_header(s: &mut String, _st: &StateTable) {
writeln!(s, "// Generated by parsuna — do not edit by hand.").unwrap();
writeln!(s, "//").unwrap();
writeln!(
s,
"// Pull-based, recoverable parser. Call one of the `parseXxx` entry"
)
.unwrap();
writeln!(
s,
"// points and iterate the returned Parser to walk the parse as a"
)
.unwrap();
writeln!(
s,
"// flat Event stream. Requires the `parsuna-rt` runtime package."
)
.unwrap();
writeln!(s).unwrap();
}
fn emit_imports(s: &mut String) {
writeln!(
s,
"import {{ DfaConfig, Lexer, Parser, TERMINATED }} from \"parsuna-rt\";"
)
.unwrap();
writeln!(
s,
"export type {{ Pos, Span, Token, ParseError, Event }} from \"parsuna-rt\";"
)
.unwrap();
writeln!(s, "export {{ errorToString }} from \"parsuna-rt\";").unwrap();
writeln!(s).unwrap();
}
fn emit_constants(s: &mut String, st: &StateTable) {
writeln!(s, "/**").unwrap();
writeln!(
s,
" * Token kinds this grammar can emit. `Eof`/`Error` are runtime sentinels;"
)
.unwrap();
writeln!(
s,
" * every other variant corresponds to a `token` declaration in the grammar."
)
.unwrap();
writeln!(s, " */").unwrap();
writeln!(s, "export enum TokenKind {{").unwrap();
writeln!(s, " Eof = 0,").unwrap();
writeln!(s, " Error = -1,").unwrap();
for t in &st.tokens {
writeln!(s, " {} = {},", pascal(&t.name), t.kind).unwrap();
}
writeln!(s, "}}").unwrap();
writeln!(s).unwrap();
writeln!(
s,
"/** Human-readable name for a token kind, for diagnostics. */"
)
.unwrap();
writeln!(s, "export function tokenKindName(k: TokenKind): string {{").unwrap();
writeln!(s, " switch (k) {{").unwrap();
writeln!(s, " case TokenKind.Eof: return \"EOF\";").unwrap();
writeln!(s, " case TokenKind.Error: return \"ERROR\";").unwrap();
for t in &st.tokens {
writeln!(
s,
" case TokenKind.{}: return \"{}\";",
pascal(&t.name),
t.name
)
.unwrap();
}
writeln!(s, " default: return \"?\";").unwrap();
writeln!(s, " }}").unwrap();
writeln!(s, "}}").unwrap();
writeln!(s).unwrap();
writeln!(s, "/**").unwrap();
writeln!(
s,
" * One variant per non-fragment grammar rule. Attached to enter/exit"
)
.unwrap();
writeln!(
s,
" * events so consumers can identify the rule a subtree corresponds to."
)
.unwrap();
writeln!(s, " */").unwrap();
writeln!(s, "export enum RuleKind {{").unwrap();
for (i, n) in st.rule_kinds.iter().enumerate() {
writeln!(s, " {} = {},", pascal(n), i).unwrap();
}
writeln!(s, "}}").unwrap();
writeln!(s).unwrap();
writeln!(
s,
"/** Human-readable name for a rule kind, for diagnostics. */"
)
.unwrap();
writeln!(s, "export function ruleKindName(k: RuleKind): string {{").unwrap();
writeln!(s, " switch (k) {{").unwrap();
for n in &st.rule_kinds {
writeln!(s, " case RuleKind.{}: return \"{}\";", pascal(n), n).unwrap();
}
writeln!(s, " default: return \"?\";").unwrap();
writeln!(s, " }}").unwrap();
writeln!(s, "}}").unwrap();
writeln!(s).unwrap();
}
fn token_variant(st: &StateTable, kind: i16) -> String {
if kind == 0 {
return "TokenKind.Eof".to_string();
}
if kind == -1 {
return "TokenKind.Error".to_string();
}
match st.tokens.iter().find(|t| t.kind == kind) {
Some(t) => format!("TokenKind.{}", pascal(&t.name)),
None => panic!(
"unknown token id {} while emitting TypeScript backend",
kind
),
}
}
fn rule_variant(st: &StateTable, kind: u16) -> String {
let name = st.rule_kinds.get(kind as usize).unwrap_or_else(|| {
panic!(
"unknown rule kind id {} while emitting TypeScript backend",
kind
)
});
format!("RuleKind.{}", pascal(name))
}
fn emit_dfa(s: &mut String, st: &StateTable) {
let dfa = &st.lexer_dfa;
writeln!(s, "const DFA_TRANS = new Uint32Array([").unwrap();
for (i, state) in dfa.states.iter().enumerate() {
write!(s, " ").unwrap();
for (j, t) in state.trans.iter().enumerate() {
let is_last = i == dfa.states.len() - 1 && j == 255;
if is_last {
write!(s, "{}", t).unwrap();
} else {
write!(s, "{}, ", t).unwrap();
}
}
writeln!(s).unwrap();
}
writeln!(s, "]);").unwrap();
writeln!(s, "const DFA_ACCEPT = new Uint16Array([").unwrap();
write!(s, " ").unwrap();
for (i, state) in dfa.states.iter().enumerate() {
let v = state.accept.unwrap_or(0);
if i == dfa.states.len() - 1 {
write!(s, "{}", v).unwrap();
} else {
write!(s, "{}, ", v).unwrap();
}
}
writeln!(s).unwrap();
writeln!(s, "]);").unwrap();
writeln!(
s,
"const LEXER_CONFIG: DfaConfig = {{ start: {}, trans: DFA_TRANS, accept: DFA_ACCEPT }};",
dfa.start
)
.unwrap();
write!(
s,
"const SKIP_KINDS: ReadonlySet<TokenKind> = new Set(["
)
.unwrap();
let mut first = true;
for t in &st.tokens {
if t.skip {
if !first {
s.push_str(", ");
}
first = false;
write!(s, "{}", token_variant(st, t.kind)).unwrap();
}
}
writeln!(s, "]);").unwrap();
writeln!(s).unwrap();
}
fn emit_tables(s: &mut String, st: &StateTable) {
writeln!(s, "const K = {};", st.k).unwrap();
for (name, id) in &st.entry_states {
writeln!(s, "const ENTRY_{} = {};", name.to_uppercase(), id).unwrap();
}
for (i, f) in st.first_sets.iter().enumerate() {
let seqs: Vec<String> = f
.iter()
.map(|seq| {
format!(
"[{}]",
seq.iter()
.map(|t| token_variant(st, *t))
.collect::<Vec<_>>()
.join(", ")
)
})
.collect();
writeln!(
s,
"const FIRST_{}: readonly (readonly TokenKind[])[] = [{}];",
i,
seqs.join(", ")
)
.unwrap();
}
for (i, f) in st.sync_sets.iter().enumerate() {
writeln!(
s,
"const SYNC_{}: readonly TokenKind[] = [{}];",
i,
f.iter()
.map(|t| token_variant(st, *t))
.collect::<Vec<_>>()
.join(", ")
)
.unwrap();
}
writeln!(s).unwrap();
}
fn emit_drive(s: &mut String, st: &StateTable) {
writeln!(
s,
"function drive(p: Parser<TokenKind, RuleKind>): void {{"
)
.unwrap();
writeln!(s, " let cur = p.getState();").unwrap();
writeln!(
s,
" while (p.queueIsEmpty() && cur !== TERMINATED) {{"
)
.unwrap();
writeln!(s, " switch (cur) {{").unwrap();
for state in st.states.values() {
writeln!(s, " case {}: {{ // {}", state.id, state.label).unwrap();
for op in &state.ops {
emit_op(s, st, op, state.id);
}
writeln!(s, " break;").unwrap();
writeln!(s, " }}").unwrap();
}
writeln!(
s,
" default: throw new Error(`unknown state ${{cur}}`);"
)
.unwrap();
writeln!(s, " }}").unwrap();
writeln!(s, " }}").unwrap();
writeln!(s, " p.setState(cur);").unwrap();
writeln!(s, "}}").unwrap();
writeln!(s).unwrap();
}
fn emit_op(s: &mut String, st: &StateTable, op: &Op, self_id: u32) {
match op {
Op::Enter(k) => {
writeln!(s, " p.enter({});", rule_variant(st, *k)).unwrap();
}
Op::Exit(k) => {
writeln!(s, " p.exit({});", rule_variant(st, *k)).unwrap();
}
Op::Expect {
kind,
token_name,
sync,
} => {
writeln!(
s,
" p.tryConsume({}, SYNC_{}, \"expected {}\");",
token_variant(st, *kind),
sync,
token_name
)
.unwrap();
}
Op::PushRet(r) => {
writeln!(s, " p.pushRet({});", r).unwrap();
}
Op::Jump(n) => {
writeln!(s, " cur = {};", n).unwrap();
}
Op::Ret => {
writeln!(s, " cur = p.popRet();").unwrap();
}
Op::Star { first, body, next } => {
writeln!(
s,
" if (p.matchesFirst(FIRST_{})) {{ p.pushRet({}); cur = {}; }}",
first, self_id, body
)
.unwrap();
writeln!(s, " else cur = {};", next).unwrap();
}
Op::Opt { first, body, next } => {
writeln!(
s,
" if (p.matchesFirst(FIRST_{})) {{ p.pushRet({}); cur = {}; }}",
first, next, body
)
.unwrap();
writeln!(s, " else cur = {};", next).unwrap();
}
Op::Dispatch { tree, sync, next } => {
emit_dispatch_tree(s, st, tree, *sync, *next, " ");
}
}
}
fn emit_dispatch_tree(
s: &mut String,
st: &StateTable,
tree: &DispatchTree,
sync: u32,
next: u32,
ind: &str,
) {
match tree {
DispatchTree::Leaf(leaf) => {
write!(s, "{}", ind).unwrap();
emit_leaf_inline(s, leaf, sync, next);
writeln!(s).unwrap();
}
DispatchTree::Switch {
depth,
arms,
default,
} => {
writeln!(s, "{}switch (p.look({}).kind) {{", ind, depth).unwrap();
let inner = format!("{} ", ind);
for (kind, sub) in arms {
let pat = token_variant(st, *kind);
match sub {
DispatchTree::Leaf(leaf) => {
write!(s, "{}case {}: ", inner, pat).unwrap();
emit_leaf_inline(s, leaf, sync, next);
writeln!(s, " break;").unwrap();
}
_ => {
writeln!(s, "{}case {}: {{", inner, pat).unwrap();
emit_dispatch_tree(s, st, sub, sync, next, &format!("{} ", inner));
writeln!(s, "{} break;", inner).unwrap();
writeln!(s, "{}}}", inner).unwrap();
}
}
}
write!(s, "{}default: ", inner).unwrap();
emit_leaf_inline(s, default, sync, next);
writeln!(s, " break;").unwrap();
writeln!(s, "{}}}", ind).unwrap();
}
}
}
fn emit_leaf_inline(s: &mut String, leaf: &DispatchLeaf, sync: u32, next: u32) {
match leaf {
DispatchLeaf::Arm(t) => write!(s, "p.pushRet({}); cur = {};", next, t).unwrap(),
DispatchLeaf::Fallthrough => write!(s, "cur = {};", next).unwrap(),
DispatchLeaf::Error => write!(
s,
"cur = {}; p.errorHere(\"unexpected token\"); p.recoverTo(SYNC_{});",
next, sync
)
.unwrap(),
}
}
fn emit_public_api(s: &mut String, st: &StateTable) {
writeln!(
s,
"const PARSER_CONFIG = {{ k: K, eofKind: TokenKind.Eof, isSkip: (k: TokenKind) => SKIP_KINDS.has(k), drive }};"
)
.unwrap();
writeln!(s).unwrap();
for (name, _) in &st.entry_states {
writeln!(s, "/** Parse the `{}` rule from a source string. */", name).unwrap();
writeln!(
s,
"export function parse{pascal}(src: string): Parser<TokenKind, RuleKind> {{",
pascal = pascal(name),
)
.unwrap();
writeln!(
s,
" const lex = new Lexer<TokenKind>(src, LEXER_CONFIG, TokenKind.Eof, TokenKind.Error);"
)
.unwrap();
writeln!(
s,
" return new Parser<TokenKind, RuleKind>(lex, ENTRY_{upper}, PARSER_CONFIG);",
upper = name.to_uppercase(),
)
.unwrap();
writeln!(s, "}}").unwrap();
writeln!(s).unwrap();
}
}