use std::fmt::Write;
use std::path::PathBuf;
use crate::codegen::common::{pascal, screaming_snake};
use crate::codegen::EmittedFile;
use crate::lowering::{DispatchLeaf, DispatchTree, Op, StateTable};
pub fn emit(st: &StateTable) -> Vec<EmittedFile> {
let pkg = if st.grammar_name.is_empty() {
"parser".to_string()
} else {
st.grammar_name.clone()
};
let mut s = String::new();
writeln!(&mut s, "// Generated by parsuna — do not edit by hand.").unwrap();
writeln!(&mut s, "//").unwrap();
writeln!(
&mut s,
"// Pull-based, recoverable parser. Obtain a {}.Parser from one of the",
capitalize(&pkg)
)
.unwrap();
writeln!(
&mut s,
"// static parseXxx factories and iterate it (or call nextEvent directly)"
)
.unwrap();
writeln!(
&mut s,
"// to walk the parse as a flat Event stream. Requires dev.parsuna:parsuna-rt."
)
.unwrap();
writeln!(&mut s, "package {};", pkg).unwrap();
writeln!(&mut s).unwrap();
writeln!(&mut s, "import java.io.ByteArrayInputStream;").unwrap();
writeln!(&mut s, "import java.io.InputStream;").unwrap();
writeln!(&mut s, "import java.nio.charset.StandardCharsets;").unwrap();
writeln!(&mut s, "import dev.parsuna.runtime.DfaConfig;").unwrap();
writeln!(&mut s, "import dev.parsuna.runtime.Event;").unwrap();
writeln!(&mut s, "import dev.parsuna.runtime.Lexer;").unwrap();
writeln!(&mut s, "import dev.parsuna.runtime.ParseError;").unwrap();
writeln!(&mut s, "import dev.parsuna.runtime.Parser;").unwrap();
writeln!(&mut s, "import dev.parsuna.runtime.ParserConfig;").unwrap();
writeln!(&mut s, "import dev.parsuna.runtime.Pos;").unwrap();
writeln!(&mut s, "import dev.parsuna.runtime.Span;").unwrap();
writeln!(&mut s, "import dev.parsuna.runtime.Token;").unwrap();
writeln!(&mut s).unwrap();
writeln!(
&mut s,
"/** Grammar-specific state and entry points. Thin wrapper around dev.parsuna.runtime.Parser. */"
)
.unwrap();
writeln!(&mut s, "public final class Grammar {{").unwrap();
writeln!(&mut s, " private Grammar() {{}}").unwrap();
writeln!(&mut s).unwrap();
emit_constants(&mut s, st);
emit_dfa(&mut s, st);
emit_tables(&mut s, st);
emit_drive(&mut s, st);
emit_public_api(&mut s, st);
writeln!(&mut s, "}}").unwrap();
vec![EmittedFile {
path: PathBuf::from(&pkg).join("Grammar.java"),
contents: s,
}]
}
fn emit_constants(s: &mut String, st: &StateTable) {
writeln!(s, " /**").unwrap();
writeln!(
s,
" * Token kinds this grammar can emit. EOF/ERROR are runtime sentinels;"
)
.unwrap();
writeln!(
s,
" * everything else is a grammar-declared token."
)
.unwrap();
writeln!(s, " */").unwrap();
writeln!(s, " public static enum TokenKind {{").unwrap();
write!(s, " EOF(0),\n ERROR(-1)").unwrap();
for t in &st.tokens {
write!(s, ",\n {}({})", screaming_snake(&t.name), t.kind).unwrap();
}
writeln!(s, ";").unwrap();
writeln!(s, " public final short id;").unwrap();
writeln!(s, " TokenKind(int id) {{ this.id = (short) id; }}").unwrap();
writeln!(s, " public String displayName() {{").unwrap();
writeln!(s, " switch (this) {{").unwrap();
writeln!(s, " case EOF: return \"EOF\";").unwrap();
writeln!(s, " case ERROR: return \"ERROR\";").unwrap();
for t in &st.tokens {
writeln!(
s,
" case {}: return \"{}\";",
screaming_snake(&t.name),
t.name
)
.unwrap();
}
writeln!(s, " default: return name();").unwrap();
writeln!(s, " }}").unwrap();
writeln!(s, " }}").unwrap();
writeln!(s, " }}").unwrap();
writeln!(s).unwrap();
writeln!(s, " /**").unwrap();
writeln!(
s,
" * One variant per non-fragment grammar rule. Attached to Event.Enter/Exit"
)
.unwrap();
writeln!(
s,
" * (via their `rule()` component) to identify each subtree in the stream."
)
.unwrap();
writeln!(s, " */").unwrap();
writeln!(s, " public static enum RuleKind {{").unwrap();
if st.rule_kinds.is_empty() {
writeln!(s, " ;").unwrap();
} else {
for (i, n) in st.rule_kinds.iter().enumerate() {
if i > 0 {
writeln!(s, ",").unwrap();
}
write!(s, " {}({})", screaming_snake(n), i).unwrap();
}
writeln!(s, ";").unwrap();
}
writeln!(s, " public final int id;").unwrap();
writeln!(s, " RuleKind(int id) {{ this.id = id; }}").unwrap();
writeln!(s, " public String displayName() {{").unwrap();
writeln!(s, " switch (this) {{").unwrap();
for n in &st.rule_kinds {
writeln!(
s,
" case {}: return \"{}\";",
screaming_snake(n),
n
)
.unwrap();
}
writeln!(s, " default: return name();").unwrap();
writeln!(s, " }}").unwrap();
writeln!(s, " }}").unwrap();
writeln!(s, " }}").unwrap();
writeln!(s).unwrap();
}
fn token_short(st: &StateTable, kind: i16) -> String {
if kind == 0 {
return "TokenKind.EOF.id".to_string();
}
if kind == -1 {
return "TokenKind.ERROR.id".to_string();
}
match st.tokens.iter().find(|t| t.kind == kind) {
Some(t) => format!("TokenKind.{}.id", screaming_snake(&t.name)),
None => panic!("unknown token id {} while emitting Java backend", kind),
}
}
fn rule_id(st: &StateTable, kind: u16) -> String {
let name = st
.rule_kinds
.get(kind as usize)
.unwrap_or_else(|| panic!("unknown rule kind id {} while emitting Java backend", kind));
format!("RuleKind.{}.id", screaming_snake(name))
}
fn emit_dfa(s: &mut String, st: &StateTable) {
let dfa = &st.lexer_dfa;
writeln!(s, " private static final int[] DFA_TRANS = new int[] {{").unwrap();
for state in &dfa.states {
write!(s, " ").unwrap();
for (j, t) in state.trans.iter().enumerate() {
if j == 255 {
write!(s, "{},", t).unwrap();
} else {
write!(s, "{}, ", t).unwrap();
}
}
writeln!(s).unwrap();
}
writeln!(s, " }};").unwrap();
writeln!(s, " private static final short[] DFA_ACCEPT = new short[] {{").unwrap();
write!(s, " ").unwrap();
for (i, state) in dfa.states.iter().enumerate() {
let v = state.accept.unwrap_or(0);
if i == dfa.states.len() - 1 {
write!(s, "{},", v).unwrap();
} else {
write!(s, "{}, ", v).unwrap();
}
}
writeln!(s).unwrap();
writeln!(s, " }};").unwrap();
writeln!(
s,
" private static final DfaConfig LEXER_CONFIG = new DfaConfig({}, DFA_TRANS, DFA_ACCEPT);",
dfa.start
)
.unwrap();
writeln!(s).unwrap();
write!(s, " private static boolean isSkip(short k) {{ return ").unwrap();
let skip_list: Vec<String> = st
.tokens
.iter()
.filter(|t| t.skip)
.map(|t| format!("k == {}", token_short(st, t.kind)))
.collect();
if skip_list.is_empty() {
writeln!(s, "false; }}").unwrap();
} else {
writeln!(s, "{}; }}", skip_list.join(" || ")).unwrap();
}
writeln!(s).unwrap();
}
fn emit_tables(s: &mut String, st: &StateTable) {
writeln!(s, " private static final int K = {};", st.k).unwrap();
for (name, id) in &st.entry_states {
writeln!(
s,
" private static final int ENTRY_{} = {};",
name.to_uppercase(),
id
)
.unwrap();
}
for (i, f) in st.first_sets.iter().enumerate() {
let seqs: Vec<String> = f
.iter()
.map(|seq| {
let kinds: Vec<String> = seq.iter().map(|t| token_short(st, *t)).collect();
format!("{{{}}}", kinds.join(", "))
})
.collect();
writeln!(
s,
" private static final short[][] FIRST_{} = new short[][]{{{}}};",
i,
seqs.join(", ")
)
.unwrap();
}
for (i, f) in st.sync_sets.iter().enumerate() {
writeln!(
s,
" private static final short[] SYNC_{} = new short[]{{{}}};",
i,
f.iter()
.map(|t| token_short(st, *t))
.collect::<Vec<_>>()
.join(", ")
)
.unwrap();
}
writeln!(s).unwrap();
}
fn emit_drive(s: &mut String, st: &StateTable) {
writeln!(s, " private static void drive(Parser p) {{").unwrap();
writeln!(
s,
" int cur = p.state();"
)
.unwrap();
writeln!(
s,
" while (p.queueIsEmpty() && cur != Parser.TERMINATED) {{"
)
.unwrap();
writeln!(s, " switch (cur) {{").unwrap();
for state in st.states.values() {
writeln!(
s,
" case {}: {{ // {}",
state.id, state.label
)
.unwrap();
for op in &state.ops {
emit_op(s, st, op, state.id);
}
writeln!(s, " break;").unwrap();
writeln!(s, " }}").unwrap();
}
writeln!(
s,
" default: throw new IllegalStateException(\"unknown state \" + cur);"
)
.unwrap();
writeln!(s, " }}").unwrap();
writeln!(s, " }}").unwrap();
writeln!(s, " p.setState(cur);").unwrap();
writeln!(s, " }}").unwrap();
writeln!(s).unwrap();
}
fn emit_op(s: &mut String, st: &StateTable, op: &Op, self_id: u32) {
match op {
Op::Enter(k) => {
writeln!(s, " p.enter({});", rule_id(st, *k)).unwrap();
}
Op::Exit(k) => {
writeln!(s, " p.exit({});", rule_id(st, *k)).unwrap();
}
Op::Expect {
kind,
token_name,
sync,
} => {
writeln!(
s,
" p.tryConsume({}, SYNC_{}, \"{}\");",
token_short(st, *kind),
sync,
token_name
)
.unwrap();
}
Op::PushRet(r) => {
writeln!(s, " p.pushRet({});", r).unwrap();
}
Op::Jump(n) => {
writeln!(s, " cur = {};", n).unwrap();
}
Op::Ret => {
writeln!(s, " cur = p.popRet();").unwrap();
}
Op::Star { first, body, next } => {
writeln!(
s,
" if (p.matchesFirst(FIRST_{})) {{ p.pushRet({}); cur = {}; }}",
first, self_id, body
)
.unwrap();
writeln!(s, " else cur = {};", next).unwrap();
}
Op::Opt { first, body, next } => {
writeln!(
s,
" if (p.matchesFirst(FIRST_{})) {{ p.pushRet({}); cur = {}; }}",
first, next, body
)
.unwrap();
writeln!(s, " else cur = {};", next).unwrap();
}
Op::Dispatch { tree, sync, next } => {
emit_dispatch_tree(s, st, tree, *sync, *next, " ");
}
}
}
fn emit_dispatch_tree(
s: &mut String,
st: &StateTable,
tree: &DispatchTree,
sync: u32,
next: u32,
ind: &str,
) {
match tree {
DispatchTree::Leaf(leaf) => {
write!(s, "{}{{ ", ind).unwrap();
emit_leaf_inline(s, leaf, sync, next);
writeln!(s, "}}").unwrap();
}
DispatchTree::Switch {
depth,
arms,
default,
} => {
writeln!(s, "{}switch (p.look({}).kind) {{", ind, depth).unwrap();
let inner = format!("{} ", ind);
for (kind, sub) in arms {
let literal = format!("(short) {}", *kind);
match sub {
DispatchTree::Leaf(leaf) => {
write!(s, "{}case {}: {{ ", inner, literal).unwrap();
emit_leaf_inline(s, leaf, sync, next);
writeln!(s, "break; }}").unwrap();
}
_ => {
writeln!(s, "{}case {}: {{", inner, literal).unwrap();
emit_dispatch_tree(s, st, sub, sync, next, &format!("{} ", inner));
writeln!(s, "{} break;", inner).unwrap();
writeln!(s, "{}}}", inner).unwrap();
}
}
}
write!(s, "{}default: {{ ", inner).unwrap();
emit_leaf_inline(s, default, sync, next);
writeln!(s, "break; }}").unwrap();
writeln!(s, "{}}}", ind).unwrap();
}
}
}
fn emit_leaf_inline(s: &mut String, leaf: &DispatchLeaf, sync: u32, next: u32) {
match leaf {
DispatchLeaf::Arm(t) => write!(s, "p.pushRet({}); cur = {}; ", next, t).unwrap(),
DispatchLeaf::Fallthrough => write!(s, "cur = {}; ", next).unwrap(),
DispatchLeaf::Error => write!(
s,
"cur = {}; p.errorHere(\"unexpected token\"); p.recoverTo(SYNC_{}); ",
next, sync
)
.unwrap(),
}
}
fn emit_public_api(s: &mut String, st: &StateTable) {
writeln!(
s,
" private static final ParserConfig CONFIG = new ParserConfig(K, TokenKind.EOF.id, k -> isSkip(k), Grammar::drive);"
)
.unwrap();
writeln!(s).unwrap();
writeln!(
s,
" private static Parser fromInputStream(InputStream in, int entry) {{"
)
.unwrap();
writeln!(
s,
" return new Parser(new Lexer(in, LEXER_CONFIG, TokenKind.EOF.id, TokenKind.ERROR.id), entry, CONFIG);"
)
.unwrap();
writeln!(s, " }}").unwrap();
writeln!(s).unwrap();
for (name, _) in &st.entry_states {
writeln!(s).unwrap();
writeln!(
s,
" /** Parse the `{name}` rule from an InputStream (read lazily in 16 KiB chunks). */",
)
.unwrap();
writeln!(
s,
" public static Parser parse{pascal}(InputStream in) {{ return fromInputStream(in, ENTRY_{upper}); }}",
pascal = pascal(name), upper = name.to_uppercase()
).unwrap();
writeln!(
s,
" /** Parse the `{name}` rule from a String (UTF-8 encoded internally). */",
)
.unwrap();
writeln!(
s,
" public static Parser parse{pascal}(String src) {{ return parse{pascal}(new ByteArrayInputStream(src.getBytes(StandardCharsets.UTF_8))); }}",
pascal = pascal(name)
).unwrap();
}
}
fn capitalize(s: &str) -> String {
let mut out = String::with_capacity(s.len());
let mut first = true;
for c in s.chars() {
if first {
out.extend(c.to_uppercase());
first = false;
} else {
out.push(c);
}
}
out
}